Issue creating materalized view on hudi

mysql> CREATE MATERIALIZED VIEW question3_mv
    -> DISTRIBUTED BY HASH(`BehaviorType`)
    -> REFRESH ASYNC START('2022-09-01 10:00:00') EVERY (interval 1 day)
    -> as select
    ->   log.BehaviorType,
    ->   count(log.BehaviorType)
    -> from
    ->   (
    ->     select
    ->       ItemID,
    ->       UserID,
    ->       window_funnel(
    ->         1800,
    ->         timestamp,
    ->         0,
    ->         [BehaviorType = 'pv' ,
    ->         BehaviorType = 'buy' ]
    ->       ) as level
    ->     from
    ->       user_behavior
    ->     where timestamp >= '2017-12-02 00:00:00'
    ->         and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       ItemID,
    ->       UserID
    ->   ) as list
    ->   left join (
    ->     select
    ->       UserID,
    ->       array_agg(BehaviorType) as BehaviorType
    ->     from
    ->       user_behavior
    ->     where
    ->       ItemID = 59883
    ->       and timestamp >= '2017-12-02 00:00:00'
    ->       and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       UserID
    ->   ) as log on list.UserID = log.UserID
    -> where
    ->   list.ItemID = 59883
    ->   and list.level = 1
    -> group by
    ->   log.BehaviorType
    -> order by
    ->   count(BehaviorType) desc;
ERROR 1064 (HY000): Getting analyzing error at line 1, column 25. Detail message: Can not find database:hudi_ecommerce.
mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| default            |
| hudi_ecommerce     |
| information_schema |
+--------------------+
3 rows in set (0.01 sec)

There is a PR to fix the message. The answer is that you can only create a MV in the StarRocks’ internal catalog and refer to hudi/iceberg and other external catalog tables.

mysql> CREATE MATERIALIZED VIEW question3_mv
    -> DISTRIBUTED BY HASH(ItemID)
    -> REFRESH ASYNC START('2022-09-01 10:00:00') EVERY (interval 1 day)
    -> as select
    ->   log.BehaviorType,
    ->   count(log.BehaviorType)
    -> from
    ->   (
    ->     select
    ->       ItemID,
    ->       UserID,
    ->       window_funnel(
    ->         1800,
    ->         timestamp,
    ->         0,
    ->         [BehaviorType = 'pv' ,
    ->         BehaviorType = 'buy' ]
    ->       ) as level
    ->     from
    ->       hudi_catalog_hms.hudi_ecommerce.user_behavior
    ->     where timestamp >= '2017-12-02 00:00:00'
    ->         and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       ItemID,
    ->       UserID
  ) as list
  left join (
    select
      UserID,
      array_agg(BehaviorType) as BehaviorType
    from
      hudi_catalog_hms.hudi_ecommerce.user_behavior
    where
      ItemID = 59883
      and timestamp >= '2017-12-02 00:00:00'
      and timestamp <= '2017-12-02 23:59:59'
    group by
      UserID
  ) as log on list.UserID = log.UserID
where
  list.ItemID = 59883
  and list.level = 1
group by
  log.BehaviorType
order by
  count(BehaviorType) desc;    ->   ) as list
    ->   left join (
    ->     select
    ->       UserID,
    ->       array_agg(BehaviorType) as BehaviorType
    ->     from
    ->       hudi_catalog_hms.hudi_ecommerce.user_behavior
    ->     where
    ->       ItemID = 59883
    ->       and timestamp >= '2017-12-02 00:00:00'
    ->       and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       UserID
    ->   ) as log on list.UserID = log.UserID
    -> where
    ->   list.ItemID = 59883
    ->   and list.level = 1
    -> group by
    ->   log.BehaviorType
    -> order by
    ->   count(BehaviorType) desc;
ERROR 1064 (HY000): Getting analyzing error. Detail message: Distribution column(ItemID) doesn't exist..
mysql> CREATE MATERIALIZED VIEW question3_mv
    -> DISTRIBUTED BY HASH(hudi_catalog_hms.hudi_ecommerce.user_behavior.ItemID)
    -> REFRESH ASYNC START('2022-09-01 10:00:00') EVERY (interval 1 day)
    -> as select
    ->   log.BehaviorType,
    ->   count(log.BehaviorType)
    -> from
    ->   (
    ->     select
    ->       ItemID,
    ->       UserID,
    ->       window_funnel(
    ->         1800,
    ->         timestamp,
    ->         0,
    ->         [BehaviorType = 'pv' ,
    ->         BehaviorType = 'buy' ]
    ->       ) as level
    ->     from
    ->       hudi_catalog_hms.hudi_ecommerce.user_behavior
    ->     where timestamp >= '2017-12-02 00:00:00'
    ->         and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       ItemID,
    ->       UserID
    ->   ) as list
    ->   left join (
    ->     select
    ->       UserID,
    ->       array_agg(BehaviorType) as BehaviorType
    ->     from
    ->       hudi_catalog_hms.hudi_ecommerce.user_behavior
    ->     where
    ->       ItemID = 59883
    ->       and timestamp >= '2017-12-02 00:00:00'
    ->       and timestamp <= '2017-12-02 23:59:59'
    ->     group by
    ->       UserID
    ->   ) as log on list.UserID = log.UserID
    -> where
    ->   list.ItemID = 59883
    ->   and list.level = 1
    -> group by
    ->   log.BehaviorType
    -> order by
    ->   count(BehaviorType) desc;
ERROR 1064 (HY000): Getting syntax error at line 2, column 36. Detail message: No viable statement for input 'DISTRIBUTED BY HASH(hudi_catalog_hms.'.
mysql>