千家信息网

hive中怎么自定义函数

发表于:2024-10-22 作者:千家信息网编辑
千家信息网最后更新 2024年10月22日,hive中怎么自定义函数,很多新手对此不是很清楚,为了帮助大家解决这个难题,下面小编将为大家详细讲解,有这方面需求的人可以来学习下,希望你能有所收获。create table tab_array(a
千家信息网最后更新 2024年10月22日hive中怎么自定义函数

hive中怎么自定义函数,很多新手对此不是很清楚,为了帮助大家解决这个难题,下面小编将为大家详细讲解,有这方面需求的人可以来学习下,希望你能有所收获。

create table tab_array(a array,b array)
row format delimited
fields terminated by '\t'
colloction items terminated by ',';
abc,helloworl,itcats 213123214,432312321
select a[0] from tab_array;

create table tab_map(name string,info map)
row format delimited
fields terminated by '\t'
colloction items terminated by ';'
map keys terminated by ';';
实例数据
fengjie age:18;size:36A;addr:usa;
furong age:28;size:39C;addr:beijing;weight:90KG
load data local inpath '/home/hadoop/hivetemp/tab_map.txt' overwrite into table tab_map
insert into table tab_map select name,map('name',name,'ip',ip) from tab_ext;

create table tab_struct(name string, info struct)
row format delimited
fields terminated by '\t'
collectionitems terminated by ','
load data local inpath '/home/hadoop/hivetemp/tab_struct.txt' overwrite into table tab_struct
insert into table tab_map select name,named_struct('age',id,'tel',name,'addr',country) from tab_struct;

cli shell
hive -S -e 'select country,count(*) from tab_ext'>~/soft/e.txt
select * from tab_ext sort by id desc limit 5;
select a.ip,b.book from tab_ext a join tab_ip_book b on (a.name=b.name)

package cn.itcas.hive.udf;

import java.util.HashMap;

import org.apache.hadoop.hive.ql.exec.UDF;

public class PhoneToArea extends UDF {


private static HashMap map=new HashMap();
static{
map.put("136","beijing");
map.put("137","tianjing");
map.put("138","nanjing");
map.put("139","shanghai");
map.put("188","tokyo");
}

//transform phoneNo To specificAreaName
public String evaluate(String phoneNum){
String area=map.get(phoneNum.substring(0, 3));
return area==null?"huoxing":area;
}
//sum the flow
public int evaluate(int upFlow,int downFlow){

return upFlow+downFlow;
}

}
//方法必须是public 否者吊用不了这个方法

打成hiveutil.jar
[root@hadoop04 hive-0.12.0]# mv ~/Desktop/hiveutil.jar ~/soft
hive> add jar /root/soft/hiveutil.jar
> ;
Added /root/soft/hiveutil.jar to class path
Added resource: /root/soft/hiveutil.jar

hive> create temporary function areasum as 'cn.itcas.hive.udf.PhoneToArea'
> ;
OK
Time taken: 0.006 seconds
[root@hadoop04 soft]# vi flow.data
13198530807,220,300
13233231222,330,450
13333231222,330,450
13433231222,330,450
13533231222,330,450
13633231222,330,450
13833231222,330,450
13933231222,330,450
18633231222,330,450
13233231222,330,450


hive> create table t_flow(phonenbr string ,upflow int,downflow int)
> row format delimited
> fields terminated by ',';
OK
Time taken: 14.112 seconds
hive> load data local inpath '/root/soft/flow.data' into table t_flow;
Copying data from file:/root/soft/flow.data
Copying file: file:/root/soft/flow.data
Loading data to table default.t_flow
Table default.t_flow stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 201, raw_data_size: 0]
OK
Time taken: 1.509 seconds
hive> select * from t_flow;
OK
13198530807 220 300
13233231222 330 450
13333231222 330 450
13433231222 330 450
13533231222 330 450
13633231222 330 450
13833231222 330 450
13933231222 330 450
18633231222 330 450
13233231222 330 450
NULL NULL
Time taken: 0.76 seconds, Fetched: 11 row(s)

hive> select phonenbr,areasum(phonenbr),areasum(upflow,downflow) from t_flow;
若出错quit之后再运行
13198530807 huoxing 520
13233231222 huoxing 780
13333231222 huoxing 780
13433231222 huoxing 780
13533231222 huoxing 780
13633231222 beijing 780
13833231222 nanjing 780
13933231222 shanghai 780
18633231222 huoxing 780
13233231222 huoxing 780


看完上述内容是否对您有帮助呢?如果还想对相关知识有进一步的了解或阅读更多相关文章,请关注行业资讯频道,感谢您对的支持。

0