千家信息网

如何生成Java数据脚本

发表于:2025-02-03 作者:千家信息网编辑
千家信息网最后更新 2025年02月03日,这篇文章主要介绍"如何生成Java数据脚本",在日常操作中,相信很多人在如何生成Java数据脚本问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答"如何生成Java数据脚本"
千家信息网最后更新 2025年02月03日如何生成Java数据脚本

这篇文章主要介绍"如何生成Java数据脚本",在日常操作中,相信很多人在如何生成Java数据脚本问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答"如何生成Java数据脚本"的疑惑有所帮助!接下来,请跟着小编一起来学习吧!

/**  * 向文件中生产数据  */object ProducePvAndUvData {  //ip  val IP = 223  //地址  val ADDRESS = Array("北京", "天津", "上海", "重庆", "河北", "辽宁","山西",                      "吉林", "江苏", "浙江", "黑龙江", "安徽", "福建", "江西",                      "山东", "河南", "湖北", "湖南", "广东", "海南", "四川",                      "贵州", "云南", "山西", "甘肃", "青海", "台湾", "内蒙",                      "广西", "西藏", "宁夏", "新疆", "香港", "澳门")  //日期  val DATE = new SimpleDateFormat("yyyy-MM-dd").format(new Date())  //timestamp  val TIMESTAMP = 0L  //userid  val USERID = 0L  //网站  val WEBSITE = Array("www.baidu.com", "www.taobao.com", "www.dangdang.com", "www.jd.com", "www.suning.com", "www.mi.com", "www.gome.com.cn")  //行为  val ACTION = Array("Regist", "Comment", "View", "Login", "Buy", "Click", "Logout")  def main(args: Array[String]): Unit = {    val pathFileName = "G://idea//scala//spark02/data"    //创建文件    val createFile = CreateFile(pathFileName)    //向文件中写入数据 需要的对象    val file = new File(pathFileName)    val fos = new FileOutputStream(file, true)    val osw = new OutputStreamWriter(fos, "UTF-8")    val pw = new PrintWriter(osw)    if (createFile) {      var i = 0      //产生5万+数据      while (i < 50000){ //模拟一个ip        val random = new Random()        val ip = random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP) + "." + random.nextInt(IP)        //模拟地址        val address = ADDRESS(random.nextInt(34))        //模拟日期        val date = DATE        //模拟userid        val userid = Math.abs(random.nextLong)        /**          * 这里的while模拟是同一个用户不同时间点对不同网站的操作          */        var j = 0        var timestamp = 0L        var webSite = "未知网站"        var action = "未知行为"        val flag = random.nextInt(5) | 1        while (j < flag) { //                                       Threads.sleep(5);          //模拟timestamp          timestamp = new Date().getTime()          //模拟网站          webSite = WEBSITE(random.nextInt(7))          //模拟行为          action = ACTION(random.nextInt(6))          j += 1          /**            * 拼装            */          val content = ip + "\t" + address + "\t" + date + "\t" + timestamp + "\t" + userid + "\t" + webSite + "\t" + action          System.out.println(content)          //向文件中写入数据          pw.write(content + "\n")        }        i += 1      }      //注意关闭的先后顺序,先打开的后关闭,后打开的先关闭      pw.close()      osw.close()      fos.close()    }  }  /**    * 创建文件    */  def CreateFile(pathFileName: String): Boolean = {    val file = new File(pathFileName)    if (file.exists) file.deleteval createNewFile = file.createNewFile()    System.out.println("create file " + pathFileName + " success!")    createNewFile  }}

统计每个网站的PU、VU、每个网站的每个地区访问量,由大到小排序

 def main(args: Array[String]): Unit = {    val conf = new SparkConf()    conf.setMaster("local")    conf.setAppName("SparkPvAndUv")    val sc = new SparkContext(conf)    val rdd: RDD[String] = sc.textFile("G:/idea/scala/spark02/data")    println("*************PU******************")    rdd.map(line=>{(line.split("\t")(5),1)})      .reduceByKey(_+_)      .sortBy(_._2,false)//是否降序,false:是降序      .foreach(println)    println("*************UV******************")    rdd.map(line=>line.split("\t")(5)+"_"+line.split("\t")(1))//网站_ip      .distinct()//去重      .map(line=>{(line.split("_")(0),1)})      .reduceByKey(_+_)      .sortBy(_._2,false)      .foreach(println)    //每个网址的每个地区访问量,由大到小排序    val site_local: RDD[(String, String)] = rdd.map(line=>{(line.split("\t")(5),line.split("\t")(1))})    val site_localIterable: RDD[(String, Iterable[String])] = site_local.groupByKey()    val result: RDD[(String, AbstractSeq[(String, Int)])] = site_localIterable.map(one => {      val localMap = mutable.Map[String, Int]()      //可变map      val site = one._1      val localIterator = one._2.iterator      while (localIterator.hasNext) {        //地区        val local = localIterator.next()        if (localMap.contains(local)) {          //如果map中有该地区,则获取该地区的值再加1          val value = localMap.get(local).get          localMap.put(local, value + 1)        } else {          //如果map中没有该地区,则获取该地区的值再加1          localMap.put(local, 1);        }      }      //默认是升序,降序:localMap.toList.sortBy(-_._2),既多一个"-"      val tuples: List[(String, Int)] = localMap.toList.sortBy(-_._2)      if (tuples.length > 3) {        val list = new ListBuffer[(String, Int)]()        for (i <- 0 to 2) {          list.append(tuples(i))        }        (site, list)      } else {        (site, tuples)      }    })    result.foreach(println)  }

到此,关于"如何生成Java数据脚本"的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注网站,小编会继续努力为大家带来更多实用的文章!

0