千家信息网

flink 多表join的例子

发表于:2024-11-14 作者:千家信息网编辑
千家信息网最后更新 2024年11月14日,今天写了一个稍微复杂的例子, 实现了类似mysql group_concat 功能,记录一下MapToString 参考bug 那篇博客public static void main(String[]
千家信息网最后更新 2024年11月14日flink 多表join的例子

今天写了一个稍微复杂的例子, 实现了类似mysql group_concat 功能,记录一下
MapToString 参考bug 那篇博客

public static void main(String[] arg) throws Exception {        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();        BatchTableEnvironment tableEnv = new BatchTableEnvironment(env, TableConfig.DEFAULT());        tableEnv.registerFunction("mapToString", new MapToString());        getProjectInfo(env,tableEnv);        getProject(env,tableEnv);        joinTableProjectWithInfo(tableEnv);        Table query = tableEnv.sqlQuery("select id, name, type from result_agg");        DataSet ds=  tableEnv.toDataSet(query, Row.class);        ds.print();        ds.writeAsText("/home/test", WriteMode.OVERWRITE);        env.execute("multiple-table");              }    public static void getProjectInfo(ExecutionEnvironment env,BatchTableEnvironment tableEnv) {        TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };        String[] fieldNames = new String[] { "id",  "type" };        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);        JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername("com.mysql.jdbc.Driver")                .setDBUrl("jdbc:mysql://ip:3306/space?characterEncoding=utf8")                .setUsername("user").setPassword("pwd")                .setQuery("select project_fid, cast(project_info_type as CHAR) as type from project").setRowTypeInfo(rowTypeInfo).finish();        DataSource s = env.createInput(jdbcInputFormat);           tableEnv.registerDataSet("project_info", s);        aggProjectInfo(tableEnv,"project_info");    }    public static void aggProjectInfo(BatchTableEnvironment tableEnv, String tableName) {           Table tapiResult = tableEnv.scan(tableName);        tapiResult.printSchema();                       Table query = tableEnv.sqlQuery("select id, mapToString(collect(type)) as type from project_info group by id");        tableEnv.registerTable(tableName+"_agg", query);                tapiResult = tableEnv.scan(tableName+"_agg");        tapiResult.printSchema();       }    public static void getProject(ExecutionEnvironment env,BatchTableEnvironment tableEnv) {        TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };        String[] fieldNames = new String[] { "pid",  "name" };        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);        JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername("com.mysql.jdbc.Driver")                .setDBUrl("jdbc:mysql://ip:3306/space?characterEncoding=utf8")                .setUsername("user").setPassword("pwd")                .setQuery("select fid, project_name  from t_project").setRowTypeInfo(rowTypeInfo).finish();        DataSource s = env.createInput(jdbcInputFormat);        tableEnv.registerDataSet("project", s);    }    public static void joinTableProjectWithInfo(BatchTableEnvironment tableEnv) {        Table result =tableEnv.sqlQuery("select a.pid as id , a.name , b.type  from project a inner join  project_info_agg  b on a.pid=b.id");        tableEnv.registerTable("result_agg", result);        result.printSchema();    }
0