Apache Hawq功能测试脚本
发表于:2025-01-25 作者:千家信息网编辑
千家信息网最后更新 2025年01月25日,一. TPC-H编译1.下载工具TPC-H下载地址2. 拷贝生成makefile,并修改makefile/opt/tpc-h-v2.17.0/dbgencp makefile.suite makefi
千家信息网最后更新 2025年01月25日Apache Hawq功能测试脚本
一. TPC-H编译
1.下载工具TPC-H
下载地址
2. 拷贝生成makefile,并修改makefile
/opt/tpc-h-v2.17.0/dbgencp makefile.suite makefile
修改makefile
3.编译
make -f makefile
4.生成数据
dbgen -v -U 1 -s 1
./dbgen
5.创建数据库以及相关表格语句
--create database tpch;\c tpch;--1. regiondrop table if exists region;create table region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(r_regionkey);--2. nationdrop table if exists nation;create table nation(n_nationkey integer,n_name char(25),n_regionkey integer,n_comment varchar(152),n_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(n_nationkey);--3.partdrop table if exists part;create table part(p_partkey bigint,p_name varchar(55),p_mfgr char(25),p_brand char(10),p_type varchar(25),p_size integer,p_container varchar(10),p_retailprice decimal,p_comment varchar(117),p_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (p_partkey);--4.supplierdrop table if exists supplier;create table supplier(s_suppkey bigint,s_name char(25),s_address varchar(40),s_nationkey int,s_phone char(15),s_acctbal decimal,s_comment varchar(101),r_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (s_suppkey);--5.partsuppdrop table if exists partsupp;create table partsupp(ps_partkey bigint,ps_suppkey bigint,ps_availqty integer,ps_supplycost decimal,ps_comment varchar(199),ps_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(ps_partkey);#6.customerdrop table if exists customer;create table customer(c_custkey bigint,c_name char(25),c_address char(40),c_nationkey integer,c_phone char(15),c_acctbal decimal,c_mktsegment char(10),c_comment varchar(117),c_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (c_custkey);--7.ordersdrop table if exists orders;create table orders(o_orderkey bigint,o_custkey bigint,o_orderstatus char(1),o_totalprice decimal,o_orderdate date,o_orderpriority char(15),o_clerk char(15),o_shippriority integer,o_comment varchar(117),o_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (o_orderkey );--8.lineitemdrop table if exists lineitem;create table lineitem(l_orderkey bigint,l_partkey bigint,l_suppkey bigint,l_linenumber integer,l_quantity decimal,l_extendedprice decimal,l_discount decimal,l_tax decimal,l_returnflag char(1),l_linestatus char(1),l_shipdate date,l_commitdate date,l_receiptdate date,l_shipinstruct char(25),l_shipmode char(10),l_comment varchar(117),l_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (l_linenumber);
6.查询语句在queries目录下 22个
二、创建表空间
1. 创建文件空间配置文件,在master节点上执行
$hawq filespace -o tpc_h_config
文件内容如下:
filespace:fs_tpc_hfsreplica:3dfs_url::mycluster/hawq_fs/fs_tpc_h
2. 创建HDFS目录
$hdfs dfs -mkdir /hawq_fs$hdfs dfs -chown gpadmin:gpadmin /hawq_fs$hdfs dfs -ls /
3.创建文件空间
$hawq filespace -c tpc_h_config
4.创建表空间,psql
create tablespace ts_tpc_h filespace fs_tpc_h;
5.删除文件空间和表空间
表空间拥有者可以删除,且不能有其他数据对象(如数据库,数据表)使用当前表空间,不能有表空间使用当前文件空间,才可以删除。
6. 查看当前所有表空间
SELECT spcname AS tblspc, fsname AS filespc, fsedbid AS seg_dbid, fselocation AS datadir FROM pg_tablespace pgts, pg_filespace pgfs, pg_filespace_entry pgfse WHERE pgts.spcfsoid=pgfse.fsefsoid AND pgfse.fsefsoid=pgfs.oid ORDER BY tblspc, seg_dbid;
三、创建数据库和Schema
1. 创建数据库
create database tpc_h with template template0 tablespace ts_tpc_h;
\c tpc_h;
2.创建schema
create schema extschema;
3. 查看当前schema,函数current_schema()
select current_schema();
4. 查看当前Schema的搜索路径
show search_path;
5. 指定schema创建数据表
create external table extschema.region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';
6. 修改SCHEMA的搜索路径
ALTER DATABASE tpc_h SET search_path TO extschema, public;
退出,重启进入psql客户端
四、创建数据表
执行第一章中的脚本。
五、创建视图
drop view if exists v_order_customer ;create view v_order_customeras select o.o_orderkey, o.o_custkey, c.c_name,o.o_orderdatefrom orders o join customer c on o.o_custkey=c.c_custkeywhere o.o_orderdate>'1997-01-01';
select * from v_order_customer limit 10;select * from v_order_customer where o_orderdate<='1997-01-01';
六、 使用gpfdist加载数据
1. 安装需要的rpm包
yum install -y libevent libyaml apr
2.启动gpfdist文件服务器
gpfdist -d /tmp/data -p 8081 -l ~/log &
3.创建外部表格
--1. regiondrop external table if exists ext_region;create external table ext_region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--2. nationdrop external table if exists ext_nation;create external table ext_nation(n_nationkey integer,n_name char(25),n_regionkey integer,n_comment varchar(152),n_extra char(1))location('gpfdist://10.110.17.104:8081/nation.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--3.partdrop external table if exists ext_part;create external table ext_part(p_partkey bigint,p_name varchar(55),p_mfgr char(25),p_brand char(10),p_type varchar(25),p_size integer,p_container varchar(10),p_retailprice decimal,p_comment varchar(117),p_extra char(1))location('gpfdist://10.110.17.104:8081/part.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--4.supplierdrop external table if exists ext_supplier;create external table ext_supplier(s_suppkey bigint,s_name char(25),s_address varchar(40),s_nationkey int,s_phone char(15),s_acctbal decimal,s_comment varchar(101),s_extra char(1))location('gpfdist://10.110.17.104:8081/supplier.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--5.partsuppdrop external table if exists ext_partsupp;create external table ext_partsupp(ps_partkey bigint,ps_suppkey bigint,ps_availqty integer,ps_supplycost decimal,ps_comment varchar(199),ps_extra char(1))location('gpfdist://10.110.17.104:8081/partsupp.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';#6.customerdrop external table if exists ext_customer;create external table ext_customer(c_custkey bigint,c_name char(25),c_address char(40),c_nationkey integer,c_phone char(15),c_acctbal decimal,c_mktsegment char(10),c_comment varchar(117),c_extra char(1))location('gpfdist://10.110.17.104:8081/customer.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--7.ordersdrop external table if exists ext_orders;create external table ext_orders(o_orderkey bigint,o_custkey bigint,o_orderstatus char(1),o_totalprice decimal,o_orderdate date,o_orderpriority char(15),o_clerk char(15),o_shippriority integer,o_comment varchar(117),o_extra char(1))location('gpfdist://10.110.17.104:8081/orders.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--8.lineitemdrop external table if exists ext_lineitem;create external table ext_lineitem(l_orderkey bigint,l_partkey bigint,l_suppkey bigint,l_linenumber integer,l_quantity decimal,l_extendedprice decimal,l_discount decimal,l_tax decimal,l_returnflag char(1),l_linestatus char(1),l_shipdate date,l_commitdate date,l_receiptdate date,l_shipinstruct char(25),l_shipmode char(10),l_comment varchar(117),l_extra char(1))location('gpfdist://10.110.17.104:8081/lineitem.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';
handling errors:
drop external table if exists ext_region1;create external table ext_region1(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8'log errors into errortable segment reject limit 10 rows;
测试是否可以正常读取数据:
select * from ext_region limit 10;select * from ext_nation limit 10;select * from ext_part limit 10;select * from ext_supplier limit 10;select * from ext_partsupp limit 10;select * from ext_customer limit 10;select * from ext_orders limit 10;select * from ext_lineitem limit 10;
4. 加载数据
insert into region select * from ext_region;insert into nation select * from ext_nation;insert into part select * from ext_part;insert into supplier select * from ext_supplier;insert into partsupp select * from ext_partsupp;insert into customer select * from ext_customer;insert into orders select * from ext_orders;insert into lineitem select * from ext_lineitem;
测试是否可以正常读取数据:
select * from region limit 10;select * from nation limit 10;select * from part limit 10;select * from supplier limit 10;select * from partsupp limit 10;select * from customer limit 10;select * from orders limit 10;select * from lineitem limit 10;
七.使用copy加载数据
1. 指定的目录master节点必须可以访问到
copy region from '/tmp/data/region.tbl'with delimiter '|';--log errors into errtable segment reject limit 10 rows;
八.hawq load工具程序加载数据
1. 在gpfdist节点上修改 /home/gpadmin/.bash_profile
在文件末尾增加如下内容:
source /usr/local/hawq/greenplum_path.sh
2.安装依赖
sudo yum install -y libevent libyaml apr
3. 创建表格
create table audit(cmd varchar(10), t timestamp);
4. 启动gpfdist文件服务器
gpfdist -p 8081 -d /tmp/data -l ~/log &
5.新增配置文件load.yaml,内容如下:
---VERSION: 1.0.0.1DATABASE: tpc_hUSER: gpadminHOST: master1PORT: 5432GPLOAD: INPUT: - SOURCE: LOCAL_HOSTNAME: - worker1 PORT: 8081 FILE: - region.tbl - COLUMNS: - r_regionkey: integer - r_name: text - r_comment: text - r_extra: text - FORMAT: text - DELIMITER: '|' - ERROR_LIMIT: 25 - ERROR_TABLE: public.errortable OUTPUT: - TABLE: public.region - MODE: INSERT SQL: - BEFORE: "INSERT INTO audit VALUES('start', current_timestamp)" - AFTER: "INSERT INTO audit VALUES('end', current_timestamp)"
4. 执行hawq load命令
hawq load -f load.yaml
九. PXF插件加载HDFS数据
1.创建外部表
drop external table if exists pxf_hdfs_textsimple;create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple') format 'text' (delimiter=e',');
//namenode单点时:master1.bigdata是指namenode节点,端口是pxf的端口地址
create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8) location ('pxf://master1.bigdata:51200/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple') format 'text' (delimiter=e',');
自定义空值:
drop external table if exists h.pxf_inventory;create external table h.pxf_inventory( inv_date_sk integer, inv_item_sk integer, inv_warehouse_sk integer, inv_quantity_on_hand integer, inv_null varchar(10))location ('pxf://master1.bigdata:51200/tpcdsdata/30T/inventory/data-m-*?profile=hdfstextsimple')format 'TEXT' (DELIMITER '|' NULL '');
2. 创建pxf_hdfs_simple.txt文件,内容:
echo 'Prague,Jan,101,4875.33Rome,Mar,87,1557.39Bangalore,May,317,8936.99Beijing,Jul,411,11600.67' > /tmp/pxf_hdfs_simple.txt
3.文件上传至hdfs
hdfs dfs -put /tmp/pxf_hdfs_simple.txt /hawq_fs/pxf_data/hdfs dfs -cat /hawq_fs/pxf_data/pxf_hdfs_simple.txt
select * from pxf_hdfs_textsimple;
十. PXF插件加载HIVE数据
1.创建hawq数据表
drop table if exists salesinfo;create table salesinfo(location text, month text, num_orders int, total_sales float8)with(appendonly=true,orientation=parquet,compresstype=snappy)distributed randomly;
1. hive中创建表sales_info
/usr/hdp/current/hive-client/bin./hive
drop table if exists sales_info;create table sales_info (location string, month string, number_of_orders int, total_sales double) row format delimited fields terminated by ',' stored as textfile;
Prague,Jan,101,4875.33Rome,Mar,87,1557.39Bangalore,May,317,8936.99Beijing,Jul,411,11600.67San Francisco,Sept,156,6846.34Paris,Nov,159,7134.56San Francisco,Jan,113,5397.89Prague,Dec,333,9894.77Bangalore,Jul,271,8320.55Beijing,Dec,100,4248.41
2. load 数据到hive数据表中
load data local inpath '/tmp/pxf_hive_datafile.txt' into table sales_info;
方法一:创建外部表
psql
1.创建外部表方式
drop table if exists salesinfo_hiveprofile;create external table salesinfo_hiveprofile(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/default.sales_info?profile=hive') format 'custom' (formatter='pxfwritable_import');
2. 导入数据
insert into salesinfo select * from salesinfo_hiveprofile;
方法二:读取hive Hcatalog 元数据服务
这种方式需要修改pxf-profiles.xml中关于Hive的内容,增加outputformat对象
HiveText:
org.apache.hawq.pxf.service.io.Text
HiveORC:
org.apache.hawq.pxf.service.io.GPDBWritable
HiveRC:
org.apache.hawq.pxf.service.io.Text
Hive
org.apache.hawq.pxf.service.io.GPDBWritable
格式:
SELECT * FROM hcatalog.hive-db-name.hive-table-name;
如:
SELECT * FROM hcatalog.default.sales_info;
导入数据:
insert into salesinfo SELECT * FROM hcatalog.default.sales_info;
十一. PXF插件加载HBASE数据
--方式1,直接引用列族和Qualifier
1. 创建外部表
drop EXTERNAL table if exists hbase_sales;create external table hbase_sales(recordkey bytea,"cf1:saleid" varchar,"cf8:comments" varchar)location ('pxf://mycluster/sales?profile=hbase')format 'custom'(formatter='pxfwritable_import')encoding 'utf8';
2. 使用HBase Shell创建HBase数据表,并写入数据
cd /usr/hdp/current/hbase-client/bin./hbase shell
创建表格
create 'sales', {NAME=>'cf1',VERSION=>2},{NAME=>'cf8',VERSION=>2}#create 'pxf_hbase_region', {NAME=>'cf1',VERSION=>2}
写入数据
put 'sales','rk001','cf1:saleid', 's001'put 'sales','rk001','cf8:comments', 'comments1'
select * from hbase_sales;
--方式2,在HBase中创建pxflookup表
create 'pxflookup', {NAME=>'mapping',VERSION=>2}put 'pxflookup', 'sales', 'mapping:id', 'cf1:saleid'put 'pxflookup', 'sales', 'mapping:cmts', 'cf8:comments'drop EXTERNAL table if exists pxf_hbase_sales ;CREATE EXTERNAL TABLE pxf_hbase_sales ( recordkey bytea, id varchar, cmts varchar)location ('pxf://mycluster/sales?profile=hbase')format 'custom'(formatter='pxfwritable_import')encoding 'utf8';select * from pxf_hbase_sales;
十二. PXF访问Jdbc(mysql)
grant all privileges on *.* to "pxf"@"%" identified by 'test'; mysql> use test; mysql> create table myclass( id int(4) not null primary key, name varchar(20) not null, gender int(4) not null default '0', degree double(16,2));
insert into myclass values(1,"tom",1,90);insert into myclass values(2,'john',0,94);insert into myclass values(3,'simon',1,79);
/etc/pxf/conf/pxf-public.classpath添加/usr/lib/pxf/mysql-connector-java-*.jar
psql:
gpadmin=#drop external table if exists jdbc_myclass; CREATE EXTERNAL TABLE jdbc_myclass(id integer, name text, gender integer, degree float8) LOCATION ('pxf://localhost:51200/test.myclass' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');select * from jdbc_myclass;
slaes_info:
create table sales_info (location varchar(200), month varchar(10), number_of_orders int, total_sales double);insert into sales_info values("Prague","Jan",101,4875.33),("Rome","Mar",87,1557.39),("Bangalore","May",317,8936.99),("Beijing","Jul",411,11600.67),("San Francisco","Sept",156,6846.34),("Paris","Nov",159,7134.56),("San Francisco","Jan",113,5397.89),("Prague","Dec",333,9894.77),("Bangalore","Jul",271,8320.55),("Beijing","Dec",100,4248.41);
psql:
drop external table if exists jdbc_sales_info ; CREATE EXTERNAL TABLE jdbc_sales_info (location varchar(200), month varchar(10), number_of_orders integer, total_sales float8) LOCATION ('pxf://localhost:51200/test.sales_info' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');CREATE EXTERNAL TABLE sales(id integer, cdate date, amt float8, grade text) LOCATION ('pxf://localhost:51200/sales' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://192.168.200.6:3306/demodb&USER=root&PASS=root' '&PARTITION_BY=cdate:date&RANGE=2008-01-01:2010-01-01&INTERVAL=1:year' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');
十三. gpfdist卸载数据
方法一: gpfdist 定义基于文件的可写外部表
1. 定义可写外部表
create writable external table unload_region(like region)location ('gpfdist://10.110.17.104:8081/exp_region.tbl')format 'text' (delimiter ',');
--只可写,不可读
2. 导入数据
insert into unload_region select * from region;
- 查看导出的数据文件exp_region.tbl
cat exp_region.tbl
方法二 使用copy卸载数据
copy region
to '/home/gpadmin/region.out';
copy (select * from region where r_regionkey=1) to '/home/gpadmin/region.out';
十四. PXF插件写数据到HDFS
1.创建可写外部表
create writable external table pxf_hdfs_writabletbl_1(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/hawq_fs/pxf_data/pxfwritable_hdfs_textsimple1?profile=hdfstextsimple') format 'text' (delimiter=e',');
2.写入数据
insert into pxf_hdfs_writabletbl_1 values ( 'frankfurt', 'mar', 777, 3956.98 );insert into pxf_hdfs_writabletbl_1 values ( 'cleveland', 'oct', 3812, 96645.37 );
3.只可写不可读
数据
文件
空间
数据表
内容
插件
数据库
方式
方法
节点
表格
目录
服务
测试
地址
对象
工具
服务器
端口
语句
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
计算机网络安全有关书籍
excel是 软件开发
gsx数据库
计算机网络技术与原理
如何同时写两个数据库
网络安全启动仪式新闻
户籍代码软件开发
江苏个人软件开发价钱
小米显示服务器繁忙
热血江湖怎么打开服务器
图书管理系统 数据库
网络安全比赛真的有韩商言吗
广东服务器机柜哪家靠谱
宁波宁海区服务器检测
海生网络系统软件开发
问道手游梦幻服务器怎么打
别名服务器
高级数据库技术实习目的
服务器启动后程序不运行
软件开发装修
阿里云企业服务器如何连接
mysql语句导入数据库
维护网络安全有利于社会进步
湖北服务管理软件开发
企业级数据库的优缺点
高青资产管理软件开发公司
我的世界服务器不能开小号怎么办
网络安全威胁情报分类
阳江导航软件开发
数据库选修了课程的学生