Doris2.0物化视图使用疑问

Viewed 9
  1. Base表:
    *************************** 1. row ***************************
    Table: dnsbeat_metric
    Create Table: CREATE TABLE dnsbeat_metric (
    req_starttime datetime(3) NULL,
    event_id bigint(20) NULL,
    req_id bigint(20) NULL,
    node_ip varchar(*) NULL,
    a_count bigint(20) NULL,
    a_dimhash text NULL,
    aaaa_count bigint(20) NULL,
    aaaa_dimhash text NULL,
    an_count bigint(20) NULL,
    an_list text NULL,
    ar_a_count bigint(20) NULL,
    ar_a_dimhash text NULL,
    ar_aaaa_count bigint(20) NULL,
    ar_aaaa_dimhash text NULL,
    ar_count bigint(20) NULL,
    ar_list json NULL,
    cname_count bigint(20) NULL,
    cname_dimhash text NULL,
    delta double NULL,
    flags text NULL,
    msg text NULL,
    node_name text NULL,
    ns_count bigint(20) NULL,
    ns_dimhash text NULL,
    ns_list json NULL,
    opcode bigint(20) NULL,
    province text NULL,
    qd_count bigint(20) NULL,
    req_dns_name text NULL,
    req_dns text NULL,
    req_domain text NULL,
    req_level bigint(20) NULL,
    req_qclass bigint(20) NULL,
    req_qtype bigint(20) NULL,
    req_timeout bigint(20) NULL,
    retry bigint(20) NULL,
    status bigint(20) NULL,
    version text NULL,
    regionname varchar(256) NULL COMMENT '区域名称,区分省份和全国',
    tasktype bigint(20) NULL COMMENT '1:顶级域;2:二级域;3:接口精细域名;4:top1w域名'
    ) ENGINE=OLAP
    UNIQUE KEY(req_starttime, event_id, req_id, node_ip)
    COMMENT 'dns.dnsbeat_metric'
    PARTITION BY RANGE(req_starttime)()
    DISTRIBUTED BY HASH(node_ip) BUCKETS AUTO
    PROPERTIES (
    "replication_allocation" = "tag.location.default: 3",
    "is_being_synced" = "false",
    "dynamic_partition.enable" = "true",
    "dynamic_partition.time_unit" = "DAY",
    "dynamic_partition.time_zone" = "Asia/Shanghai",
    "dynamic_partition.start" = "-730",
    "dynamic_partition.end" = "7",
    "dynamic_partition.prefix" = "p",
    "dynamic_partition.replication_allocation" = "tag.location.default: 3",
    "dynamic_partition.buckets" = "10",
    "dynamic_partition.create_history_partition" = "true",
    "dynamic_partition.history_partition_num" = "-1",
    "dynamic_partition.hot_partition_num" = "0",
    "dynamic_partition.reserved_history_periods" = "NULL",
    "dynamic_partition.storage_policy" = "",
    "storage_format" = "V2",
    "enable_unique_key_merge_on_write" = "true",
    "light_schema_change" = "true",
    "function_column.sequence_col" = "event_id",
    "disable_auto_compaction" = "false",
    "enable_single_replica_compaction" = "false"
    );

  2. 根据Base表最初写的普通视图:
    CREATE VIEW dnsbeat_metric_aggregate_view COMMENT 'VIEW' AS
    SELECT
    minute_floor ( req_starttime ) AS req_time,
    province AS province,
    node_name AS node_name,
    node_ip AS node_ip,
    req_level AS req_level,
    req_domain AS req_domain,
    req_dns AS req_dns,
    req_dns_name AS req_dns_name,
    regionname AS regionname,
    avg( delta ) AS total_avg_delta,
    avg( CASE WHEN status >= 0 THEN delta ELSE NULL END ) AS ok_avg_delta,
    avg( CASE WHEN status = - 1 THEN delta ELSE NULL END ) AS err_avg_delta,
    count( CASE WHEN delta >= 0 AND delta < 1000 THEN 1 ELSE NULL END ) AS count_delta0,
    count( CASE WHEN delta >= 1000 AND delta < 2000 THEN 1 ELSE NULL END ) AS count_delta1,
    count( CASE WHEN delta >= 2000 AND delta < 3000 THEN 1 ELSE NULL END ) AS count_delta2,
    count( CASE WHEN delta >= 3000 AND delta < 4000 THEN 1 ELSE NULL END ) AS count_delta3,
    count( CASE WHEN delta >= 4000 AND delta < 5000 THEN 1 ELSE NULL END ) AS count_delta4,
    count( status ) AS req_count,
    count( CASE WHEN status = - 1 THEN 1 ELSE NULL END ) AS count_err,
    count( CASE WHEN status = 0 THEN 1 ELSE NULL END ) AS count_code0,
    count( CASE WHEN status = 1 THEN 1 ELSE NULL END ) AS count_code1,
    count( CASE WHEN status = 2 THEN 1 ELSE NULL END ) AS count_code2,
    count( CASE WHEN status = 3 THEN 1 ELSE NULL END ) AS count_code3,
    count( CASE WHEN status = 4 THEN 1 ELSE NULL END ) AS count_code4,
    count( CASE WHEN status = 5 THEN 1 ELSE NULL END ) AS count_code5,
    count( CASE WHEN status BETWEEN - 1 AND 5 THEN NULL ELSE 1 END ) AS count_other
    FROM
    default_cluster:dns.dnsbeat_metric
    WHERE
    req_starttime >= CAST( CURRENT_DATE () - INTERVAL 7 DAY AS DATETIMEV2 (3))
    GROUP BY
    req_time,
    province,
    node_name,
    node_ip,
    req_level,
    req_domain,
    req_dns,
    req_dns_name,
    regionname;

疑问:我的业务场景是要查询最小粒度最近五分钟的数据,以及查询最近6小时的数据,物化视图该如何实现呢?物化视图是否应该每五分钟刷新一次,如果持续刷新会不会有瓶颈?
备注:因近期对业务进行了更新导致数据量剧增,每天至少6E数据,当前普通视图查询非常之慢。
麻烦各位老师指点下。

0 Answers