hive中怎么使用udaf函数求中位数
发表于:2024-11-19 作者:千家信息网编辑
千家信息网最后更新 2024年11月19日,hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。看下中位数定义:MEDIAN 中位数(一组数据按
千家信息网最后更新 2024年11月19日hive中怎么使用udaf函数求中位数
hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。
看下中位数定义:
MEDIAN 中位数(一组数据按从小到大的顺序依次排列,处在中间位置的一个数或最中间两个数据的平均数)
写成genericUDAF的形式
1 2 3 4 中位数 2+3/2=2.51 2 3 中位数 2
代码如下
package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList;import java.util.Collections;import java.util.List; import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.parse.SemanticException;import org.apache.hadoop.hive.serde2.io.DoubleWritable;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.util.StringUtils; @Description(name="median",value="" + "_FUNC_(x) return the median number of a number array. eg: median(x)")public class GenericUDAFMedian extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFMedian.class.getName()); @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { if(parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length-1, "Only 1 parameter is accepted!"); } ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); if(!ObjectInspectorUtils.compareSupported(objectInspector)) { throw new UDFArgumentTypeException(parameters.length - 1, "Cannot support comparison of map<> type or complex type containing map<>."); } switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { case BYTE: case SHORT: case INT: return new GenericUDAFMedianEvaluatorInt(); case LONG: return new GenericUDAFMedianEvaluatorLong(); case FLOAT: case DOUBLE: return new GenericUDAFMedianEvaluatorDouble(); case STRING: case BOOLEAN: default: throw new UDFArgumentTypeException(0, "Only numeric type(int long double) arguments are accepted but " + parameters[0].getTypeName() + " was passed as parameter of index->1."); } } public static class GenericUDAFMedianEvaluatorInt extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableIntObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayListfoi = new ArrayList (); foi.add(listOI); ArrayList fname = new ArrayList (); fname.add("list"); partialResult = new Object[1]; partialResult[0] = new ArrayList (); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList (); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; int val = 0; try { val = PrimitiveObjectInspectorUtils.getInt(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new IntWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList (medianNumberAgg.aggIntegerList.size()); ((ArrayList ) partialResult[0]).addAll( medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList ) listFieldOI.getList(partialObject); for( IntWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } public static class GenericUDAFMedianEvaluatorDouble extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayList foi = new ArrayList (); foi.add(listOI); ArrayList fname = new ArrayList (); fname.add("list"); partialResult = new Object[1]; return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList (); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; doubleval = 0.0; try { val = PrimitiveObjectInspectorUtils.getDouble(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new DoubleWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList (medianNumberAgg.aggIntegerList.size()); ((ArrayList ) partialResult[0]).addAll(medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList ) listFieldOI.getList(partialObject); for( DoubleWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } public static class GenericUDAFMedianEvaluatorLong extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableLongObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayList foi = new ArrayList (); foi.add(listOI); ArrayList fname = new ArrayList (); fname.add("list"); partialResult = new Object[1]; partialResult[0] = new ArrayList (); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList (); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; long val = 0L; try { val = PrimitiveObjectInspectorUtils.getLong(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new LongWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList (medianNumberAgg.aggIntegerList.size()); ((ArrayList ) partialResult[0]).addAll(medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList ) listFieldOI.getList(partialObject); for( LongWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } }
测试:
use datawarehouse;add jar /home/hadoop/shengli/median.jar;create temporary function median as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMedian';select median(id) from(select 7 id from dualunion allselect 8 id from dualunion allselect 1 id from dual ) a; select median(id) from(select cast(1 as bigint) id from dualunion all select cast(2 as bigint) id from dual) a select median(id) from(select 1.0 id from dualunion all select 2.3 id from dual) a select median(id) from(select 1 id from dualunion allselect 2 id from dualunion allselect 3 id from dual) a select median(id) from( select null id from dual) a---------------------------------select type,median(id) from(select 'a' type,3 id from dualunion allselect 'a' type,-2 id from dualunion allselect 'a' type,1 id from dualunion allselect 'a' type,4 id from dualunion allselect 'b' type,6 id from dualunion allselect 'b' type,5 id from dualunion allselect 'b' type,4 id from dual) agroup by type
关于hive中怎么使用udaf函数求中位数问题的解答就分享到这里了,希望以上内容可以对大家有一定的帮助,如果你还有很多疑惑没有解开,可以关注行业资讯频道了解更多相关知识。
中位数
问题
函数
数据
更多
帮助
解答
易行
从小到大
简单易行
两个
个数
从小
代码
位置
内容
处在
小伙
小伙伴
平均数
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
关闭轻松阅读服务器的应用
培训学校网络安全宣传
数据库接口开发需求
深圳网络安全与执法公务员
国家网络安全演戏
进一步加强网络安全情况的报告
皮蓬数据库
随着网络技术越来越强
网络技术基础与黑客
数据库可以识别的日期类型
数据库图片怎么看
通达信 服务器
上海天锦软件开发公司
徐玉玉案电信网络安全
云服务器数据安全保密
顶级dns服务器
文明6服务器自己搭建
智能灌溉服务器
数据库中间件更新延迟
宝信软件软件开发待遇
数据库关系模型试用类型
昆明合众软件开发有限公司
怎么用饥荒自带服务器
什么是数据库的连接数
计算机及网络技术工程师
access数据库打印
服务器加vpn转换ip
数据库笔记本电脑支架
亳州医疗软件开发公司哪家好
数据库主外键适用场景