千家信息网

php通过thrift操作hbase

发表于:2024-09-27 作者:千家信息网编辑
千家信息网最后更新 2024年09月27日,环境配置操作系统 centos 5.8 hadoop版本cloudera cdh4u3 hbase版本hbase-0.90.4-cdh4u3 php版本5.21. 下载并编译thrift# wget
千家信息网最后更新 2024年09月27日php通过thrift操作hbase

环境配置

操作系统 centos 5.8 hadoop版本cloudera cdh4u3 hbase版本hbase-0.90.4-cdh4u3 php版本5.2

1. 下载并编译thrift

# wget http://ftp.tc.edu.tw/pub/Apache/thrift/0.8.0/thrift-0.8.0.tar.gz

安装所需的依赖包

# yum install automake libtool flex bison pkgconfig gcc-c++ boost-devel libevent-devel zlib-devel python-devel ruby-devel php php-devel

# tar zxvf thrift-0.8.0.tar.gz

# cd thrift-0.8.0

# ./configure --prefix=/home/thrift --with-php-config=/usr/bin/php-config

# make && make install

2 生成php和hbase的接口文件:

# cd /home/thrift/

# bin/thrift --gen php $HBASE_HOME/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift

# cd gen-php/Hbase

# ls

Hbase.php Hbase_types.php

3. 把PHP客户端需要的包及刚才生成的接口文件复制出来供php程序调用:

# mkdir -p /var/www/html/hbasethrift/libs (/var/www/html为apache的web主目录)

# cp -a /home/soft/thrift-0.8.0/lib/php/src /var/www/html/hbasethrift/libs

# mkdir -p /var/www/html/hbasethrift/libs/packages

# cp -a /home/thrift/gen-php/Hbase /var/www/html/hbasethrift/libs/packages

4. 启动hbase thrift server,测试php连接hbase

# ./bin/hbase-daemon.sh start thrift

hbase thrift 默认监听端口为9090

测试php连接与操作hbase代码

# vi hbasethrift.php

  1. php
  2. $GLOBALS['THRIFT_ROOT'] = '/home/www/html/hbasethrift/libs';
  3. require_once( $GLOBALS['THRIFT_ROOT'].'/Thrift.php' );
  4. require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php' );
  5. require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php' );
  6. require_once( $GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php' );
  7. require_once( $GLOBALS['THRIFT_ROOT'].'/packages/Hbase/Hbase.php' );
  8. $socket = new TSocket( 'localhost', 9090 );
  9. $socket->setSendTimeout( 10000 ); // Ten seconds (too long for production, but this is just a demo ;)
  10. $socket->setRecvTimeout( 20000 ); // Twenty seconds
  11. $transport = new TBufferedTransport( $socket );
  12. $protocol = new TBinaryProtocol( $transport );
  13. $client = new HbaseClient( $protocol );
  14. $transport->open();
  15. echo nl2br( "listing tables...\n" );
  16. $tables = $client->getTableNames();
  17. sort( $tables );
  18. foreach ( $tables as $name ) {
  19. echo nl2br( " found: {$name}\n" );
  20. }
  21. $columns = array(
  22. new ColumnDescriptor( array(
  23. 'name' => 'entry:',
  24. 'maxVersions' => 10
  25. ) ),
  26. new ColumnDescriptor( array(
  27. 'name' => 'unused:'
  28. ) )
  29. );
  30. $t = "table1";
  31. echo( "creating table: {$t}\n" );
  32. try {
  33. $client->createTable( $t, $columns );
  34. } catch ( AlreadyExists $ae ) {
  35. echo( "WARN: {$ae->message}\n" );
  36. }
  37. $t = "test";
  38. echo( "column families in {$t}:\n" );
  39. $descriptors = $client->getColumnDescriptors( $t );
  40. asort( $descriptors );
  41. foreach ( $descriptors as $col ) {
  42. echo( " column: {$col->name}, maxVer: {$col->maxVersions}\n" );
  43. }
  44. $t = "table1";
  45. echo( "column families in {$t}:\n" );
  46. $descriptors = $client->getColumnDescriptors( $t );
  47. asort( $descriptors );
  48. foreach ( $descriptors as $col ) {
  49. echo( " column: {$col->name}, maxVer: {$col->maxVersions}\n" );
  50. }
  51. $t = "table1";
  52. $row = "row_name";
  53. $valid = "foobar-\xE7\x94\x9F\xE3\x83\x93";
  54. $mutations = array(
  55. new Mutation( array(
  56. 'column' => 'entry:foo',
  57. 'value' => $valid
  58. ) ),
  59. );
  60. // 多记录批量提交(200提交一次时测试小记录大概在5000/s左右): $rows = array('timestamp'=>$timestamp, 'columns'=>array('txt:col1'=>$col1, 'txt:col2'=>$col2, 'txt:col3'=>$col3)); $records = array(rowkey=>$rows,...); $batchrecord = array(); foreach ($records as $rowkey => $rows) { $timestamp = $rows['timestamp']; $columns = $rows['columns']; // 生成一条记录 $record = array(); foreach($columns as $column => $value) { $col = new Mutation(array('column'=>$column, 'value'=>$value)); array_push($record, $col); } // 加入记录数组 $batchTmp = new BatchMutation(array('row'=>$rowkey, 'mutations'=>$record)); array_push($batchrecord, $batchTmp); } $ret = $hbase->mutateRows('test', $batchrecord);
  61. $client->mutateRow( $t, $row, $mutations );
  62. $table_name = "table1";
  63. $row_name = 'row_name';
  64. $fam_col_name = 'entry:foo';
  65. $arr = $client->get($table_name, $row_name , $fam_col_name);
  66. // $arr = array
  67. foreach ( $arr as $k=>$v ) {
  68. // $k = TCell
  69. echo ("value = {$v->value} , <br> ");
  70. echo ("timestamp = {$v->timestamp} <br>");
  71. }
  72. $table_name = "table1";
  73. $row_name = "row_name";
  74. $arr = $client->getRow($table_name, $row_name);
  75. // $client->getRow return a array
  76. foreach ( $arr as $k=>$TRowResult ) {
  77. // $k = 0 ; non-use
  78. // $TRowResultTRowResult = TRowResult
  79. var_dump($TRowResult);
  80. }
  81. //scannerOpenWithStop($tableName, $startRow, $stopRow, $columns);
  82. $table_name = 'zTest';
  83. $startRow="9-9-20120627-";
  84. $stopRow="9-9-20120627_";
  85. $columns = Array ('info:');
  86. $result =$client->scannerOpenWithStop($table_name,$startRow,$stopRow,$columns);
  87. while (true) {
  88. $record = $client->scannerGet($result);
  89. if ($record == NULL) {
  90. break;
  91. }
  92. foreach($record as $TRowResult) {
  93. $row = $TRowResult->row;
  94. $column = $TRowResult->columns;
  95. foreach($column as $family_column=>$Tcell){
  96. echo("$family_column={$Tcell->value}
    ");
  97. echo("timestamp is $Tcell->timestamp");
  98. }
  99. }
  100. }
  101. $transport->close();
  102. ?>

通过浏览器访问http://localhost/hbasethrift/hbasethrift.php,如果显示hbase中的表名与新建表table1 ,说明连接成功。

hbase thrift api 参考http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/thrift/doc-files/index.html


参考http://www.banping.com/2011/07/08/hbase-thrift-php/

0