Apache Hawq功能测试脚本
发表于:2024-09-22 作者:千家信息网编辑
千家信息网最后更新 2024年09月22日,一. TPC-H编译1.下载工具TPC-H下载地址2. 拷贝生成makefile,并修改makefile/opt/tpc-h-v2.17.0/dbgencp makefile.suite makefi
千家信息网最后更新 2024年09月22日Apache Hawq功能测试脚本
一. TPC-H编译
1.下载工具TPC-H
下载地址
2. 拷贝生成makefile,并修改makefile
/opt/tpc-h-v2.17.0/dbgencp makefile.suite makefile
修改makefile
3.编译
make -f makefile
4.生成数据
dbgen -v -U 1 -s 1
./dbgen
5.创建数据库以及相关表格语句
--create database tpch;\c tpch;--1. regiondrop table if exists region;create table region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(r_regionkey);--2. nationdrop table if exists nation;create table nation(n_nationkey integer,n_name char(25),n_regionkey integer,n_comment varchar(152),n_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(n_nationkey);--3.partdrop table if exists part;create table part(p_partkey bigint,p_name varchar(55),p_mfgr char(25),p_brand char(10),p_type varchar(25),p_size integer,p_container varchar(10),p_retailprice decimal,p_comment varchar(117),p_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (p_partkey);--4.supplierdrop table if exists supplier;create table supplier(s_suppkey bigint,s_name char(25),s_address varchar(40),s_nationkey int,s_phone char(15),s_acctbal decimal,s_comment varchar(101),r_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (s_suppkey);--5.partsuppdrop table if exists partsupp;create table partsupp(ps_partkey bigint,ps_suppkey bigint,ps_availqty integer,ps_supplycost decimal,ps_comment varchar(199),ps_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by(ps_partkey);#6.customerdrop table if exists customer;create table customer(c_custkey bigint,c_name char(25),c_address char(40),c_nationkey integer,c_phone char(15),c_acctbal decimal,c_mktsegment char(10),c_comment varchar(117),c_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (c_custkey);--7.ordersdrop table if exists orders;create table orders(o_orderkey bigint,o_custkey bigint,o_orderstatus char(1),o_totalprice decimal,o_orderdate date,o_orderpriority char(15),o_clerk char(15),o_shippriority integer,o_comment varchar(117),o_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (o_orderkey );--8.lineitemdrop table if exists lineitem;create table lineitem(l_orderkey bigint,l_partkey bigint,l_suppkey bigint,l_linenumber integer,l_quantity decimal,l_extendedprice decimal,l_discount decimal,l_tax decimal,l_returnflag char(1),l_linestatus char(1),l_shipdate date,l_commitdate date,l_receiptdate date,l_shipinstruct char(25),l_shipmode char(10),l_comment varchar(117),l_extra char(1))with(appendonly=true,orientation=parquet,compresstype=snappy)distributed by (l_linenumber);
6.查询语句在queries目录下 22个
二、创建表空间
1. 创建文件空间配置文件,在master节点上执行
$hawq filespace -o tpc_h_config
文件内容如下:
filespace:fs_tpc_hfsreplica:3dfs_url::mycluster/hawq_fs/fs_tpc_h
2. 创建HDFS目录
$hdfs dfs -mkdir /hawq_fs$hdfs dfs -chown gpadmin:gpadmin /hawq_fs$hdfs dfs -ls /
3.创建文件空间
$hawq filespace -c tpc_h_config
4.创建表空间,psql
create tablespace ts_tpc_h filespace fs_tpc_h;
5.删除文件空间和表空间
表空间拥有者可以删除,且不能有其他数据对象(如数据库,数据表)使用当前表空间,不能有表空间使用当前文件空间,才可以删除。
6. 查看当前所有表空间
SELECT spcname AS tblspc, fsname AS filespc, fsedbid AS seg_dbid, fselocation AS datadir FROM pg_tablespace pgts, pg_filespace pgfs, pg_filespace_entry pgfse WHERE pgts.spcfsoid=pgfse.fsefsoid AND pgfse.fsefsoid=pgfs.oid ORDER BY tblspc, seg_dbid;
三、创建数据库和Schema
1. 创建数据库
create database tpc_h with template template0 tablespace ts_tpc_h;
\c tpc_h;
2.创建schema
create schema extschema;
3. 查看当前schema,函数current_schema()
select current_schema();
4. 查看当前Schema的搜索路径
show search_path;
5. 指定schema创建数据表
create external table extschema.region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';
6. 修改SCHEMA的搜索路径
ALTER DATABASE tpc_h SET search_path TO extschema, public;
退出,重启进入psql客户端
四、创建数据表
执行第一章中的脚本。
五、创建视图
drop view if exists v_order_customer ;create view v_order_customeras select o.o_orderkey, o.o_custkey, c.c_name,o.o_orderdatefrom orders o join customer c on o.o_custkey=c.c_custkeywhere o.o_orderdate>'1997-01-01';
select * from v_order_customer limit 10;select * from v_order_customer where o_orderdate<='1997-01-01';
六、 使用gpfdist加载数据
1. 安装需要的rpm包
yum install -y libevent libyaml apr
2.启动gpfdist文件服务器
gpfdist -d /tmp/data -p 8081 -l ~/log &
3.创建外部表格
--1. regiondrop external table if exists ext_region;create external table ext_region(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--2. nationdrop external table if exists ext_nation;create external table ext_nation(n_nationkey integer,n_name char(25),n_regionkey integer,n_comment varchar(152),n_extra char(1))location('gpfdist://10.110.17.104:8081/nation.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--3.partdrop external table if exists ext_part;create external table ext_part(p_partkey bigint,p_name varchar(55),p_mfgr char(25),p_brand char(10),p_type varchar(25),p_size integer,p_container varchar(10),p_retailprice decimal,p_comment varchar(117),p_extra char(1))location('gpfdist://10.110.17.104:8081/part.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--4.supplierdrop external table if exists ext_supplier;create external table ext_supplier(s_suppkey bigint,s_name char(25),s_address varchar(40),s_nationkey int,s_phone char(15),s_acctbal decimal,s_comment varchar(101),s_extra char(1))location('gpfdist://10.110.17.104:8081/supplier.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--5.partsuppdrop external table if exists ext_partsupp;create external table ext_partsupp(ps_partkey bigint,ps_suppkey bigint,ps_availqty integer,ps_supplycost decimal,ps_comment varchar(199),ps_extra char(1))location('gpfdist://10.110.17.104:8081/partsupp.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';#6.customerdrop external table if exists ext_customer;create external table ext_customer(c_custkey bigint,c_name char(25),c_address char(40),c_nationkey integer,c_phone char(15),c_acctbal decimal,c_mktsegment char(10),c_comment varchar(117),c_extra char(1))location('gpfdist://10.110.17.104:8081/customer.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--7.ordersdrop external table if exists ext_orders;create external table ext_orders(o_orderkey bigint,o_custkey bigint,o_orderstatus char(1),o_totalprice decimal,o_orderdate date,o_orderpriority char(15),o_clerk char(15),o_shippriority integer,o_comment varchar(117),o_extra char(1))location('gpfdist://10.110.17.104:8081/orders.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';--8.lineitemdrop external table if exists ext_lineitem;create external table ext_lineitem(l_orderkey bigint,l_partkey bigint,l_suppkey bigint,l_linenumber integer,l_quantity decimal,l_extendedprice decimal,l_discount decimal,l_tax decimal,l_returnflag char(1),l_linestatus char(1),l_shipdate date,l_commitdate date,l_receiptdate date,l_shipinstruct char(25),l_shipmode char(10),l_comment varchar(117),l_extra char(1))location('gpfdist://10.110.17.104:8081/lineitem.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8';
handling errors:
drop external table if exists ext_region1;create external table ext_region1(r_regionkey integer,r_name char(25),r_comment varchar(152),r_extra char(1))location('gpfdist://10.110.17.104:8081/region.tbl')format 'text' (delimiter '|' null '' escape 'OFF')encoding 'UTF8'log errors into errortable segment reject limit 10 rows;
测试是否可以正常读取数据:
select * from ext_region limit 10;select * from ext_nation limit 10;select * from ext_part limit 10;select * from ext_supplier limit 10;select * from ext_partsupp limit 10;select * from ext_customer limit 10;select * from ext_orders limit 10;select * from ext_lineitem limit 10;
4. 加载数据
insert into region select * from ext_region;insert into nation select * from ext_nation;insert into part select * from ext_part;insert into supplier select * from ext_supplier;insert into partsupp select * from ext_partsupp;insert into customer select * from ext_customer;insert into orders select * from ext_orders;insert into lineitem select * from ext_lineitem;
测试是否可以正常读取数据:
select * from region limit 10;select * from nation limit 10;select * from part limit 10;select * from supplier limit 10;select * from partsupp limit 10;select * from customer limit 10;select * from orders limit 10;select * from lineitem limit 10;
七.使用copy加载数据
1. 指定的目录master节点必须可以访问到
copy region from '/tmp/data/region.tbl'with delimiter '|';--log errors into errtable segment reject limit 10 rows;
八.hawq load工具程序加载数据
1. 在gpfdist节点上修改 /home/gpadmin/.bash_profile
在文件末尾增加如下内容:
source /usr/local/hawq/greenplum_path.sh
2.安装依赖
sudo yum install -y libevent libyaml apr
3. 创建表格
create table audit(cmd varchar(10), t timestamp);
4. 启动gpfdist文件服务器
gpfdist -p 8081 -d /tmp/data -l ~/log &
5.新增配置文件load.yaml,内容如下:
---VERSION: 1.0.0.1DATABASE: tpc_hUSER: gpadminHOST: master1PORT: 5432GPLOAD: INPUT: - SOURCE: LOCAL_HOSTNAME: - worker1 PORT: 8081 FILE: - region.tbl - COLUMNS: - r_regionkey: integer - r_name: text - r_comment: text - r_extra: text - FORMAT: text - DELIMITER: '|' - ERROR_LIMIT: 25 - ERROR_TABLE: public.errortable OUTPUT: - TABLE: public.region - MODE: INSERT SQL: - BEFORE: "INSERT INTO audit VALUES('start', current_timestamp)" - AFTER: "INSERT INTO audit VALUES('end', current_timestamp)"
4. 执行hawq load命令
hawq load -f load.yaml
九. PXF插件加载HDFS数据
1.创建外部表
drop external table if exists pxf_hdfs_textsimple;create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple') format 'text' (delimiter=e',');
//namenode单点时:master1.bigdata是指namenode节点,端口是pxf的端口地址
create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8) location ('pxf://master1.bigdata:51200/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple') format 'text' (delimiter=e',');
自定义空值:
drop external table if exists h.pxf_inventory;create external table h.pxf_inventory( inv_date_sk integer, inv_item_sk integer, inv_warehouse_sk integer, inv_quantity_on_hand integer, inv_null varchar(10))location ('pxf://master1.bigdata:51200/tpcdsdata/30T/inventory/data-m-*?profile=hdfstextsimple')format 'TEXT' (DELIMITER '|' NULL '');
2. 创建pxf_hdfs_simple.txt文件,内容:
echo 'Prague,Jan,101,4875.33Rome,Mar,87,1557.39Bangalore,May,317,8936.99Beijing,Jul,411,11600.67' > /tmp/pxf_hdfs_simple.txt
3.文件上传至hdfs
hdfs dfs -put /tmp/pxf_hdfs_simple.txt /hawq_fs/pxf_data/hdfs dfs -cat /hawq_fs/pxf_data/pxf_hdfs_simple.txt
select * from pxf_hdfs_textsimple;
十. PXF插件加载HIVE数据
1.创建hawq数据表
drop table if exists salesinfo;create table salesinfo(location text, month text, num_orders int, total_sales float8)with(appendonly=true,orientation=parquet,compresstype=snappy)distributed randomly;
1. hive中创建表sales_info
/usr/hdp/current/hive-client/bin./hive
drop table if exists sales_info;create table sales_info (location string, month string, number_of_orders int, total_sales double) row format delimited fields terminated by ',' stored as textfile;
Prague,Jan,101,4875.33Rome,Mar,87,1557.39Bangalore,May,317,8936.99Beijing,Jul,411,11600.67San Francisco,Sept,156,6846.34Paris,Nov,159,7134.56San Francisco,Jan,113,5397.89Prague,Dec,333,9894.77Bangalore,Jul,271,8320.55Beijing,Dec,100,4248.41
2. load 数据到hive数据表中
load data local inpath '/tmp/pxf_hive_datafile.txt' into table sales_info;
方法一:创建外部表
psql
1.创建外部表方式
drop table if exists salesinfo_hiveprofile;create external table salesinfo_hiveprofile(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/default.sales_info?profile=hive') format 'custom' (formatter='pxfwritable_import');
2. 导入数据
insert into salesinfo select * from salesinfo_hiveprofile;
方法二:读取hive Hcatalog 元数据服务
这种方式需要修改pxf-profiles.xml中关于Hive的内容,增加outputformat对象
HiveText:
org.apache.hawq.pxf.service.io.Text
HiveORC:
org.apache.hawq.pxf.service.io.GPDBWritable
HiveRC:
org.apache.hawq.pxf.service.io.Text
Hive
org.apache.hawq.pxf.service.io.GPDBWritable
格式:
SELECT * FROM hcatalog.hive-db-name.hive-table-name;
如:
SELECT * FROM hcatalog.default.sales_info;
导入数据:
insert into salesinfo SELECT * FROM hcatalog.default.sales_info;
十一. PXF插件加载HBASE数据
--方式1,直接引用列族和Qualifier
1. 创建外部表
drop EXTERNAL table if exists hbase_sales;create external table hbase_sales(recordkey bytea,"cf1:saleid" varchar,"cf8:comments" varchar)location ('pxf://mycluster/sales?profile=hbase')format 'custom'(formatter='pxfwritable_import')encoding 'utf8';
2. 使用HBase Shell创建HBase数据表,并写入数据
cd /usr/hdp/current/hbase-client/bin./hbase shell
创建表格
create 'sales', {NAME=>'cf1',VERSION=>2},{NAME=>'cf8',VERSION=>2}#create 'pxf_hbase_region', {NAME=>'cf1',VERSION=>2}
写入数据
put 'sales','rk001','cf1:saleid', 's001'put 'sales','rk001','cf8:comments', 'comments1'
select * from hbase_sales;
--方式2,在HBase中创建pxflookup表
create 'pxflookup', {NAME=>'mapping',VERSION=>2}put 'pxflookup', 'sales', 'mapping:id', 'cf1:saleid'put 'pxflookup', 'sales', 'mapping:cmts', 'cf8:comments'drop EXTERNAL table if exists pxf_hbase_sales ;CREATE EXTERNAL TABLE pxf_hbase_sales ( recordkey bytea, id varchar, cmts varchar)location ('pxf://mycluster/sales?profile=hbase')format 'custom'(formatter='pxfwritable_import')encoding 'utf8';select * from pxf_hbase_sales;
十二. PXF访问Jdbc(mysql)
grant all privileges on *.* to "pxf"@"%" identified by 'test'; mysql> use test; mysql> create table myclass( id int(4) not null primary key, name varchar(20) not null, gender int(4) not null default '0', degree double(16,2));
insert into myclass values(1,"tom",1,90);insert into myclass values(2,'john',0,94);insert into myclass values(3,'simon',1,79);
/etc/pxf/conf/pxf-public.classpath添加/usr/lib/pxf/mysql-connector-java-*.jar
psql:
gpadmin=#drop external table if exists jdbc_myclass; CREATE EXTERNAL TABLE jdbc_myclass(id integer, name text, gender integer, degree float8) LOCATION ('pxf://localhost:51200/test.myclass' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');select * from jdbc_myclass;
slaes_info:
create table sales_info (location varchar(200), month varchar(10), number_of_orders int, total_sales double);insert into sales_info values("Prague","Jan",101,4875.33),("Rome","Mar",87,1557.39),("Bangalore","May",317,8936.99),("Beijing","Jul",411,11600.67),("San Francisco","Sept",156,6846.34),("Paris","Nov",159,7134.56),("San Francisco","Jan",113,5397.89),("Prague","Dec",333,9894.77),("Bangalore","Jul",271,8320.55),("Beijing","Dec",100,4248.41);
psql:
drop external table if exists jdbc_sales_info ; CREATE EXTERNAL TABLE jdbc_sales_info (location varchar(200), month varchar(10), number_of_orders integer, total_sales float8) LOCATION ('pxf://localhost:51200/test.sales_info' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');CREATE EXTERNAL TABLE sales(id integer, cdate date, amt float8, grade text) LOCATION ('pxf://localhost:51200/sales' '?PROFILE=JDBC' '&JDBC_DRIVER=com.mysql.jdbc.Driver' '&DB_URL=jdbc:mysql://192.168.200.6:3306/demodb&USER=root&PASS=root' '&PARTITION_BY=cdate:date&RANGE=2008-01-01:2010-01-01&INTERVAL=1:year' ) FORMAT 'CUSTOM' (Formatter='pxfwritable_import');
十三. gpfdist卸载数据
方法一: gpfdist 定义基于文件的可写外部表
1. 定义可写外部表
create writable external table unload_region(like region)location ('gpfdist://10.110.17.104:8081/exp_region.tbl')format 'text' (delimiter ',');
--只可写,不可读
2. 导入数据
insert into unload_region select * from region;
- 查看导出的数据文件exp_region.tbl
cat exp_region.tbl
方法二 使用copy卸载数据
copy region
to '/home/gpadmin/region.out';
copy (select * from region where r_regionkey=1) to '/home/gpadmin/region.out';
十四. PXF插件写数据到HDFS
1.创建可写外部表
create writable external table pxf_hdfs_writabletbl_1(location text, month text, num_orders int, total_sales float8) location ('pxf://mycluster/hawq_fs/pxf_data/pxfwritable_hdfs_textsimple1?profile=hdfstextsimple') format 'text' (delimiter=e',');
2.写入数据
insert into pxf_hdfs_writabletbl_1 values ( 'frankfurt', 'mar', 777, 3956.98 );insert into pxf_hdfs_writabletbl_1 values ( 'cleveland', 'oct', 3812, 96645.37 );
3.只可写不可读
数据
文件
空间
数据表
内容
插件
数据库
方式
方法
节点
表格
目录
服务
测试
地址
对象
工具
服务器
端口
语句
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
数据库对网站有什么用
数据库技术与应用是一门
数据挖掘物联网网络安全
网络技术作业代做公众号
迅雷 推荐静态服务器
检测站网络安全应急预案
鼎盛创美互联网科技
计算机实用网络技术论文
江西互联网养老软件开发
销售管理数据库设计er图
软件开发模型 之迭代
数据库中20c代表的什么意思
网站查询服务器商
数据库系统原理专升本试题
小火箭服务器订阅加速软件
网络安全博览会场地如何收费
车载网络技术3000字
Base服务器解释
益阳象佳软件开发有限公司
iis自动开启管理服务器
搜佛说互联网科技工作室
客户电话采集软件开发
极光推送服务器的搭建
网络安全目前存在漏洞
徐州网络安全认证流程有哪些
服务器的部署环境是什么
数据库登录权限怎么设置
dell服务器操作系统
服务器安全防护方案
服务器 管理 终端密码