千家信息网

hive ETL业绩报表sql怎么写

发表于:2025-01-31 作者:千家信息网编辑
千家信息网最后更新 2025年01月31日,这篇文章主要讲解了"hive ETL业绩报表sql怎么写",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"hive ETL业绩报表sql怎么写"吧!--
千家信息网最后更新 2025年01月31日hive ETL业绩报表sql怎么写

这篇文章主要讲解了"hive ETL业绩报表sql怎么写",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"hive ETL业绩报表sql怎么写"吧!

-- case4 ----========== rates ==========--app0    1app1    2app2    2app3    3app4    3app5    3app6    5app7    5app8    5app9    5CREATE EXTERNAL TABLE rates (    app_name    STRING   , star_rates  STRING)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'LOCATION '/tmp/db/rates';create table app_ranks as  select app_name as app       , star_rates as stars       , NTILE(3) OVER (ORDER BY star_rates DESC) as nt       , row_number() OVER (ORDER BY star_rates DESC) as rn       , rank() OVER (ORDER BY star_rates DESC) as rk       , dense_rank() OVER (ORDER BY star_rates DESC) as drk       , CUME_DIST() OVER (ORDER BY star_rates) as cd       , PERCENT_RANK() OVER (ORDER BY star_rates) as pr  from rates  order by stars desc;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from app_ranks;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) from app_ranks;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) from app_ranks;select app, stars, rn, lead(rn) OVER (PARTITION BY stars ORDER BY rn), lag(rn) OVER (PARTITION BY stars ORDER BY rn) from app_ranks;--========== visitors ==========--d001    201301    101d002    201301    102d003    201301    103d001    201302    111d002    201302    112d003    201302    113d001    201303    121d002    201303    122d003    201303    123d001    201304    131d002    201304    132d003    201304    133d001    201305    141d002    201305    142d003    201305    143d001    201306    151d002    201306    152d003    201306    153d001    201307    201d002    201307    202d003    201307    203d001    201308    211d002    201308    212d003    201308    213d001    201309    221d002    201309    222d003    201309    223d001    201310    231d002    201310    232d003    201310    233d001    201311    241d002    201311    242d003    201311    243d001    201312    301d002    201312    302d003    201312    303d001    201401    301d002    201401    302d003    201401    303d001    201402    211d002    201402    212d003    201402    213d001    201403    271d002    201403    272d003    201403    273d001    201404    331d002    201404    332d003    201404    333d001    201405    351d002    201405    352d003    201405    353CREATE EXTERNAL TABLE visitors (    domain  STRING   , month   STRING  , visitor STRING)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'LOCATION '/tmp/db/visitors';select * from visitors where domain = 'd001';select domain     , month     , visitor     , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)     , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)     , lead(visitor) OVER (PARTITION BY domain ORDER BY month DESC)     , lag(visitor) OVER (PARTITION BY domain ORDER BY month DESC)from visitorswhere domain = 'd001';select domain     , month     , visitor     , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)     , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)     , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)     , lag(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)     , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)     , lag(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)from visitorswhere domain = 'd001';create table visitors_report as  select domain       , month       , visitor       , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)            as last_mon       , visitor - lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)  as delta_mon       , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)           as last_year       , visitor - lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC) as delta_year  from visitors;select * from visitors_report where domain = 'd001' and month > '2014';select month     , domain     , visitor     , last_mon     , last_yearfrom visitors_reportwhere (domain = 'd001' or domain = 'd002') and month > '2014'order by month desc, domain asclimit 100;select month     , domain     , visitor     , max(visitor) OVER (PARTITION BY month) as max_visitors     , min(visitor) OVER (PARTITION BY month) as min_visitorsfrom visitorswhere month > '2014'order by month desc, domain asc;select *from (select month     , domain     , visitor     , max(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as max_visitors_last_12_mon     , min(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as min_visitors_last_12_monfrom visitors) vwhere month > '20131'order by month desc, domain asc;

感谢各位的阅读,以上就是"hive ETL业绩报表sql怎么写"的内容了,经过本文的学习后,相信大家对hive ETL业绩报表sql怎么写这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是,小编将为大家推送更多相关知识点的文章,欢迎关注!

0