千家信息网

hive中怎么使用udaf函数求中位数

发表于:2024-11-19 作者:千家信息网编辑
千家信息网最后更新 2024年11月19日,hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。看下中位数定义:MEDIAN 中位数(一组数据按
千家信息网最后更新 2024年11月19日hive中怎么使用udaf函数求中位数

hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。

看下中位数定义:

MEDIAN 中位数(一组数据按从小到大的顺序依次排列,处在中间位置的一个数或最中间两个数据的平均数)
写成genericUDAF的形式

1 2 3 4      中位数 2+3/2=2.51 2 3 中位数 2

代码如下

package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList;import java.util.Collections;import java.util.List; import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.parse.SemanticException;import org.apache.hadoop.hive.serde2.io.DoubleWritable;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.util.StringUtils;  @Description(name="median",value=""                + "_FUNC_(x) return the median number of a number array. eg: median(x)")public class GenericUDAFMedian extends AbstractGenericUDAFResolver {         static final Log LOG = LogFactory.getLog(GenericUDAFMedian.class.getName());                @Override        public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)                        throws SemanticException {                if(parameters.length != 1) {                        throw new UDFArgumentTypeException(parameters.length-1, "Only 1 parameter is accepted!");                }                                ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);                if(!ObjectInspectorUtils.compareSupported(objectInspector)) {                        throw new UDFArgumentTypeException(parameters.length - 1, "Cannot support comparison of map<> type or complex type containing map<>.");                }                                switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) {                case BYTE:            case SHORT:            case INT:                    return new GenericUDAFMedianEvaluatorInt();            case LONG:                    return new GenericUDAFMedianEvaluatorLong();            case FLOAT:            case DOUBLE:                    return new GenericUDAFMedianEvaluatorDouble();            case STRING:            case BOOLEAN:            default:              throw new UDFArgumentTypeException(0,                  "Only numeric type(int long double) arguments are accepted but "                  + parameters[0].getTypeName() + " was passed as parameter of index->1.");                }        }                public static class GenericUDAFMedianEvaluatorInt extends GenericUDAFEvaluator {                                private DoubleWritable result = new DoubleWritable() ;                PrimitiveObjectInspector inputOI;                StructObjectInspector structOI;                StandardListObjectInspector listOI;                StructField listField;                Object[] partialResult;                  ListObjectInspector listFieldOI;                                 @Override                public ObjectInspector init(Mode m, ObjectInspector[] parameters)                                throws HiveException {             assert (parameters.length == 1);                         super.init(m, parameters);                                                  listOI = ObjectInspectorFactory.getStandardListObjectInspector(                                      PrimitiveObjectInspectorFactory.writableIntObjectInspector);                         //init input                         if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) {                                 inputOI = (PrimitiveObjectInspector) parameters[0];                         }                         else {                                 structOI = (StructObjectInspector) parameters[0];                                 listField = structOI.getStructFieldRef("list");                                 listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector();                        }                                                //init output                         if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) {                                 ArrayList foi = new ArrayList();                                 foi.add(listOI);                                 ArrayList fname = new ArrayList();                             fname.add("list");                             partialResult = new Object[1];                             partialResult[0] = new ArrayList();                             return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);                         }else {                                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;                        }                                        }                 static class MedianNumberAgg implements AggregationBuffer {                        List aggIntegerList;                }                                @Override                public AggregationBuffer getNewAggregationBuffer() throws HiveException {                        MedianNumberAgg resultAgg = new MedianNumberAgg();                        reset(resultAgg);                        return resultAgg;                }                 @Override                public void reset(AggregationBuffer agg) throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg;                        medianNumberAgg.aggIntegerList = null;                        medianNumberAgg.aggIntegerList = new ArrayList();                }                  boolean warned = false;                                 @Override                public void iterate(AggregationBuffer agg, Object[] parameters)                                throws HiveException {                        assert(parameters.length == 1);                        if(parameters[0] != null) {                                MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                                int val = 0;                                try {                                         val = PrimitiveObjectInspectorUtils.getInt(parameters[0], (PrimitiveObjectInspector)inputOI);                                } catch (NullPointerException e) {                                        LOG.warn("got a null value, skip it");                                }catch (NumberFormatException e) {                                        if(!warned) {                                                warned = true;                                                LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));                                                LOG.warn("ignore similar exceptions.");                                        }                                                                        }                                medianNumberAgg.aggIntegerList.add(new IntWritable(val));                        }                }                 @SuppressWarnings("unchecked")                @Override                public Object terminate(AggregationBuffer agg) throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        Collections.sort(medianNumberAgg.aggIntegerList);                        int size = medianNumberAgg.aggIntegerList.size();                        if(size == 1) {                                result.set((double)medianNumberAgg.aggIntegerList.get(0).get());                                return result;                        }                        double rs = 0.0;//                      int midIndex = (int) Math.floor(((double) size / 2));                        int midIndex = size / 2;                        if(size%2 == 1) {                                rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get();                        }                        else if(size%2 == 0) {                                rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ;                        }                        result.set(rs);                        return result;                }                 @Override                public Object terminatePartial(AggregationBuffer agg)                                throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size());                        ((ArrayList) partialResult[0]).addAll(  medianNumberAgg.aggIntegerList);                        return partialResult;                }                 @Override                public void merge(AggregationBuffer agg, Object partial)                                throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        Object partialObject = structOI.getStructFieldData(partial, listField);                        ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject);                        for( IntWritable  i : resultList) {                                medianNumberAgg.aggIntegerList.add(i);                        }                }                        }  public static class GenericUDAFMedianEvaluatorDouble extends GenericUDAFEvaluator {                                private DoubleWritable result = new DoubleWritable() ;                PrimitiveObjectInspector inputOI;                StructObjectInspector structOI;                StandardListObjectInspector listOI;                StructField listField;                Object[] partialResult;                  ListObjectInspector listFieldOI;                                 @Override                public ObjectInspector init(Mode m, ObjectInspector[] parameters)                                throws HiveException {             assert (parameters.length == 1);                         super.init(m, parameters);                                                  listOI = ObjectInspectorFactory.getStandardListObjectInspector(                                      PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);                         //init input                         if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) {                                 inputOI = (PrimitiveObjectInspector) parameters[0];                         }                         else {                                 structOI = (StructObjectInspector) parameters[0];                                 listField = structOI.getStructFieldRef("list");                                 listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector();                        }                                                //init output                         if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) {                                 ArrayList foi = new ArrayList();                                 foi.add(listOI);                                 ArrayList fname = new ArrayList();                             fname.add("list");                             partialResult = new Object[1];                             return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);                         }else {                                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;                        }                                        }                 static class MedianNumberAgg implements AggregationBuffer {                        List aggIntegerList;                }                                @Override                public AggregationBuffer getNewAggregationBuffer() throws HiveException {                        MedianNumberAgg resultAgg = new MedianNumberAgg();                        reset(resultAgg);                        return resultAgg;                }                 @Override                public void reset(AggregationBuffer agg) throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg;                        medianNumberAgg.aggIntegerList = null;                        medianNumberAgg.aggIntegerList = new ArrayList();                }                  boolean warned = false;                                 @Override                public void iterate(AggregationBuffer agg, Object[] parameters)                                throws HiveException {                        assert(parameters.length == 1);                        if(parameters[0] != null) {                                MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                                doubleval = 0.0;                                try {                                         val = PrimitiveObjectInspectorUtils.getDouble(parameters[0], (PrimitiveObjectInspector)inputOI);                                } catch (NullPointerException e) {                                        LOG.warn("got a null value, skip it");                                }catch (NumberFormatException e) {                                        if(!warned) {                                                warned = true;                                                LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));                                                LOG.warn("ignore similar exceptions.");                                        }                                                                        }                                medianNumberAgg.aggIntegerList.add(new DoubleWritable(val));                        }                }                 @SuppressWarnings("unchecked")                @Override                public Object terminate(AggregationBuffer agg) throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        Collections.sort(medianNumberAgg.aggIntegerList);                        int size = medianNumberAgg.aggIntegerList.size();                        if(size == 1) {                                result.set((double)medianNumberAgg.aggIntegerList.get(0).get());                                return result;                        }                        double rs = 0.0;//                      int midIndex = (int) Math.floor(((double) size / 2));                        int midIndex = size / 2;                        if(size%2 == 1) {                                rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get();                        }                        else if(size%2 == 0) {                                rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ;                        }                        result.set(rs);                        return result;                }                 @Override                public Object terminatePartial(AggregationBuffer agg)                                throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size());                        ((ArrayList) partialResult[0]).addAll(medianNumberAgg.aggIntegerList);                        return partialResult;                }                 @Override                public void merge(AggregationBuffer agg, Object partial)                                throws HiveException {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        Object partialObject = structOI.getStructFieldData(partial, listField);                        ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject);                        for( DoubleWritable  i : resultList) {                                medianNumberAgg.aggIntegerList.add(i);                        }                }                        }         public static class GenericUDAFMedianEvaluatorLong extends GenericUDAFEvaluator {                private DoubleWritable result = new DoubleWritable() ;        PrimitiveObjectInspector inputOI;        StructObjectInspector structOI;        StandardListObjectInspector listOI;        StructField listField;        Object[] partialResult;          ListObjectInspector listFieldOI;                 @Override        public ObjectInspector init(Mode m, ObjectInspector[] parameters)                        throws HiveException {         assert (parameters.length == 1);                 super.init(m, parameters);                                  listOI = ObjectInspectorFactory.getStandardListObjectInspector(                              PrimitiveObjectInspectorFactory.writableLongObjectInspector);                 //init input                 if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) {                         inputOI = (PrimitiveObjectInspector) parameters[0];                 }                 else {                         structOI = (StructObjectInspector) parameters[0];                         listField = structOI.getStructFieldRef("list");                         listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector();                }                                //init output                 if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) {                         ArrayList foi = new ArrayList();                         foi.add(listOI);                         ArrayList fname = new ArrayList();                     fname.add("list");                     partialResult = new Object[1];                     partialResult[0] = new ArrayList();                     return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);                 }else {                        return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;                }                        }         static class MedianNumberAgg implements AggregationBuffer {                List aggIntegerList;        }                @Override        public AggregationBuffer getNewAggregationBuffer() throws HiveException {                MedianNumberAgg resultAgg = new MedianNumberAgg();                reset(resultAgg);                return resultAgg;        }         @Override        public void reset(AggregationBuffer agg) throws HiveException {                MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg;                medianNumberAgg.aggIntegerList = null;                medianNumberAgg.aggIntegerList = new ArrayList();        }          boolean warned = false;                 @Override        public void iterate(AggregationBuffer agg, Object[] parameters)                        throws HiveException {                assert(parameters.length == 1);                if(parameters[0] != null) {                        MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                        long val = 0L;                        try {                                 val = PrimitiveObjectInspectorUtils.getLong(parameters[0], (PrimitiveObjectInspector)inputOI);                        } catch (NullPointerException e) {                                LOG.warn("got a null value, skip it");                        }catch (NumberFormatException e) {                                if(!warned) {                                        warned = true;                                        LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));                                        LOG.warn("ignore similar exceptions.");                                }                                                        }                        medianNumberAgg.aggIntegerList.add(new LongWritable(val));                }        }         @SuppressWarnings("unchecked")        @Override        public Object terminate(AggregationBuffer agg) throws HiveException {                MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                Collections.sort(medianNumberAgg.aggIntegerList);                int size = medianNumberAgg.aggIntegerList.size();                if(size == 1) {                        result.set((double)medianNumberAgg.aggIntegerList.get(0).get());                        return result;                }                double rs = 0.0;//              int midIndex = (int) Math.floor(((double) size / 2));                int midIndex = size / 2;                if(size%2 == 1) {                        rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get();                }                else if(size%2 == 0) {                        rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ;                }                result.set(rs);                return result;        }         @Override        public Object terminatePartial(AggregationBuffer agg)                        throws HiveException {                MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size());                ((ArrayList) partialResult[0]).addAll(medianNumberAgg.aggIntegerList);                return partialResult;        }         @Override        public void merge(AggregationBuffer agg, Object partial)                        throws HiveException {                MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg;                Object partialObject = structOI.getStructFieldData(partial, listField);                ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject);                for( LongWritable  i : resultList) {                        medianNumberAgg.aggIntegerList.add(i);                }        }        }        }

测试:

use datawarehouse;add jar /home/hadoop/shengli/median.jar;create temporary function median as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMedian';select median(id) from(select 7 id from dualunion allselect 8 id from dualunion allselect 1 id from dual ) a;  select median(id) from(select cast(1 as bigint) id from dualunion all select cast(2 as bigint) id from dual) a  select median(id) from(select 1.0 id from dualunion all select 2.3 id from dual) a select median(id) from(select 1 id from dualunion allselect 2 id from dualunion allselect 3 id from dual) a  select median(id) from( select null id from dual) a---------------------------------select type,median(id) from(select 'a' type,3 id from dualunion allselect 'a' type,-2 id from dualunion allselect 'a' type,1 id from dualunion allselect 'a' type,4 id from dualunion allselect 'b' type,6 id from dualunion allselect 'b' type,5 id from dualunion allselect 'b' type,4 id from dual) agroup by type

关于hive中怎么使用udaf函数求中位数问题的解答就分享到这里了,希望以上内容可以对大家有一定的帮助,如果你还有很多疑惑没有解开,可以关注行业资讯频道了解更多相关知识。

0