doris如何通过定时任务实现scd2

Viewed 8

我有一个维度表如下,尝试使用定时任务,但是报错显示不支持以下sql语句,请问基于doris如何定时实现以下功能呢:

CREATE VIEW vw_ods_location AS
SELECT
p.id AS province_id,
c.id AS city_id,
d.id AS district_id,
p.full_name AS province_full_name,
c.full_name AS city_full_name,
d.name AS district_name
FROM ods_province p
JOIN ods_city c
ON c.province_id = p.id
LEFT JOIN ods_district d
ON d.province_id = p.id
AND d.city_id = c.id;

-- 把所有当前记录中,与最新来源中同一业务键但字段发生变化的那批,置为历史
UPDATE dim_location
SET
is_current = 0,
end_date = CURRENT_TIMESTAMP()
WHERE
is_current = 1
AND EXISTS (
SELECT 1
FROM vw_ods_location src
WHERE
src.province_id = dim_location.province_id
AND src.city_id = dim_location.city_id
AND src.district_id = dim_location.district_id
-- 任意一个名称字段不相同,就说明要打历史
AND (
src.province_full_name <> dim_location.province_full_name
OR src.city_full_name <> dim_location.city_full_name
OR src.district_name <> dim_location.district_name
)
);

INSERT INTO dim_location (
province_id,
city_id,
district_id,
province_full_name,
city_full_name,
district_name,
start_date,
end_date,
is_current
)
SELECT
src.province_id,
src.city_id,
src.district_id,
src.province_full_name,
src.city_full_name,
src.district_name,
CURRENT_TIMESTAMP() AS start_date,
'9999-12-31 23:59:59' AS end_date,
1 AS is_current
FROM
vw_ods_location src
LEFT JOIN
dim_location tgt
ON tgt.province_id = src.province_id
AND tgt.city_id = src.city_id
AND tgt.district_id = src.district_id
AND tgt.is_current = 1
WHERE
-- 如果在 dim_location 中没有“当前”行,就插入(全新)
tgt.province_id IS NULL

-- 或者即便存在“当前”行,但名称字段已经变化,也插入新版本

OR (
tgt.province_full_name <> src.province_full_name
OR tgt.city_full_name <> src.city_full_name
OR tgt.district_name <> src.district_name
);

0 Answers