千家信息网

spark访问hbase

发表于:2025-01-19 作者:千家信息网编辑
千家信息网最后更新 2025年01月19日,import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}import org.apache.hadoop.hbase.
千家信息网最后更新 2025年01月19日spark访问hbase


import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}import org.apache.hadoop.hbase.mapreduce.TableInputFormatimport org.apache.spark.rdd.NewHadoopRDDval conf = HBaseConfiguration.create()conf.set(TableInputFormat.INPUT_TABLE, "tmp")var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])hBaseRDD.count()import scala.collection.JavaConverters._hBaseRDD.map(tuple => tuple._2).map(result => result.getColumn("cf".getBytes(), "val".getBytes())).map(keyValues => {( keyValues.asScala.reduceLeft {    (a, b) => if (a.getTimestamp > b.getTimestamp) a else b  }.getRow,  keyValues.asScala.reduceLeft {    (a, b) => if (a.getTimestamp > b.getTimestamp) a else b  }.getValue)}).take(10)hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("cf".getBytes(), "val".getBytes()))).map(row => {(  row._1.map(_.toChar).mkString,  row._2.asScala.reduceLeft {    (a, b) => if (a.getTimestamp > b.getTimestamp) a else b  }.getValue.map(_.toChar).mkString)}).take(10)conf.set(TableInputFormat.INPUT_TABLE, "test1")//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {(  row._1.map(_.toChar).mkString,  row._2.asScala.reduceLeft {    (a, b) => if (a.getTimestamp > b.getTimestamp) a else b  }.getValue.map(_.toInt).mkString)}).take(10)import java.nio.ByteBufferhBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {(  row._1.map(_.toChar).mkString,  ByteBuffer.wrap(row._2.asScala.reduceLeft {    (a, b) => if (a.getTimestamp > b.getTimestamp) a else b  }.getValue).getLong)}).take(10)//conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "lf")conf.set(TableInputFormat.SCAN_COLUMNS, "lf:app1")//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])import java.nio.ByteBufferhBaseRDD.map(tuple => tuple._2).map(result => {  ( result.getRow.map(_.toChar).mkString,    ByteBuffer.wrap(result.value).getLong  )}).take(10)val conf = HBaseConfiguration.create()conf.set(TableInputFormat.INPUT_TABLE, "test1")var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "") ).groupByKey.take(10)


0