千家信息网

Java爬虫之如何实现B站粉丝取关人排查

发表于:2024-10-27 作者:千家信息网编辑
千家信息网最后更新 2024年10月27日,这篇文章主要讲解了"Java爬虫之如何实现B站粉丝取关人排查",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"Java爬虫之如何实现B站粉丝取关人排查"吧
千家信息网最后更新 2024年10月27日Java爬虫之如何实现B站粉丝取关人排查

这篇文章主要讲解了"Java爬虫之如何实现B站粉丝取关人排查",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"Java爬虫之如何实现B站粉丝取关人排查"吧!

1.爬虫开发准备

开发工具:Eclipse/IDEA

浏览器:Google Chrome

浏览器Selement驱动:Selenium 3.5

Jar包:

// Selenium驱动版本需要和Chrome浏览器版本对应

2.流程

  1. 获取Cookie(终端输入或者使用Selenium打开扫码登录)

  2. 请求https://api.bilibili.com/x/relation/followers接口

  3. 解析数据

  4. 存入csv

3.编码

package com.mm.rep;import java.io.IOException;import java.nio.charset.Charset;import java.util.List;import java.util.Scanner;import java.util.Set;import org.openqa.selenium.Cookie;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.URI;import org.apache.commons.httpclient.methods.GetMethod;import org.apache.commons.httpclient.params.HttpMethodParams;import org.apache.log4j.BasicConfigurator;import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import org.openqa.selenium.WebDriver;import org.openqa.selenium.chrome.ChromeDriver;import org.openqa.selenium.chrome.ChromeOptions;import com.alibaba.fastjson.JSON;import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import com.csvreader.CsvWriter;import org.apache.commons.lang3.StringUtils;public class Main {        private static final Logger logger = LogManager.getLogger(Main.class);                private static WebDriver driver = null;        private static GetMethod getMethod = null;        private static Set bcookies = null;        private final static String BLOGINURL = "https://passport.bilibili.com/login";        private final static String BMAINPAGE = "https://www.bilibili.com/";                Main(){                BasicConfigurator.configure();                // 初始化GetMethod,设置不变的RequestHeader                getMethod = new GetMethod();                getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");                getMethod.addRequestHeader(":authority", "api.bilibili.com");                getMethod.addRequestHeader(":method", "api.bilibili.com");                getMethod.addRequestHeader(":scheme", "GET");                getMethod.addRequestHeader(":scheme", "https");                getMethod.addRequestHeader("accept", "*/*");                getMethod.addRequestHeader(":scheme", "https");                getMethod.addRequestHeader("accept-language", "zh-CN,zh;q=0.9");                getMethod.addRequestHeader("sec-fetch-dest", "script");                getMethod.addRequestHeader("sec-fetch-mode", "no-cors");                getMethod.addRequestHeader("sec-fetch-site", "same-site");                getMethod.addRequestHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36");        }                        public static String getCookie() throws InterruptedException {                                String scCookie = null;        Scanner ip = new Scanner(System.in);        logger.info("请输入Cookie,如果没有请按回车:");        scCookie = ip.nextLine();                if(scCookie.length() != 0) {                return scCookie;        }                logger.info("开始扫码登录");                // 设置驱动地址                System.setProperty("webdriver.chrome.driver", "H:/chromedriver/chromedriver.exe");                // 启动设置                ChromeOptions options = new ChromeOptions();                // 创建ChromeDriver                driver = new ChromeDriver(options);                // 打开Bilibili登录页面                driver.get(BLOGINURL);                // 等待扫码登录                while(true) {                        if(driver.getCurrentUrl().equals(BMAINPAGE)) {                                break;                        }else {                                Thread.sleep(100);                        }                }                                                logger.info("扫码登录成功");                                                //获取cookie                bcookies = driver.manage().getCookies();                String cookie = StringUtils.join(bcookies, "; ");                                return cookie;        }                public static List getFanS(String cookie,String vmid,int pn,int ps) throws InterruptedException, HttpException, IOException {                                HttpClient client = new HttpClient();                 // 拼接url                StringBuffer sBuffer = new StringBuffer();                sBuffer.append("https://api.bilibili.com/x/relation/followers?vmid=");                sBuffer.append(vmid);                sBuffer.append("&pn=");                sBuffer.append(pn);                sBuffer.append("&ps=");                sBuffer.append(ps);                sBuffer.append("&order=desc&jsonp=jsonp");                getMethod.setURI(new URI(sBuffer.toString(), true));                getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");                // 设置请求头                getMethod.addRequestHeader("cookie", cookie);                // 发送请求                client.executeMethod(getMethod);                // 获取数据                String info = new String(getMethod.getResponseBody(), "UTF-8");                JSONObject fans = JSONObject.parseObject(info).getJSONObject("data");                JSONArray fArray = JSONArray.parseArray(fans.getString("list"));                                return JSON.parseArray(fArray.toJSONString(), JSONObject.class);        }                public static void main(String[] args) throws InterruptedException, HttpException{                logger.info("程序开始...");                new Main();                // 获取Cookie                String cookie = Main.getCookie();                                                CsvWriter csvWriter = new CsvWriter("C:\\Users\\computer\\Desktop\\aaa.csv", ',', Charset.forName("UTF-8"));                String[] csvHeaders = { "mid", "粉丝名字","粉丝签名","粉丝头像"};                        try {                                csvWriter.writeRecord(csvHeaders);                                                int pn = 1;                        boolean end = false;                        while(true) {                                for (JSONObject f : Main.getFanS(cookie, "309103931", pn, 20)) {                                        if(f == null) {                                                end = true;                                                break;                                        }                                        String[] csvContent1 = {f.getString("mid"), f.getString("uname"),f.getString("sign"),f.getString("face")};                                        System.out.println(csvContent1);                                        csvWriter.writeRecord(csvContent1);                                }                                pn++;                                Thread.sleep(100);                                if(end == true) {                                        break;                                }                        }                } catch (IOException e) {                        System.out.println(e);                        e.printStackTrace();                }                csvWriter.close();                driver.close();                logger.info("程序结束");        }                }

4.缺点

容易被拦截,最多获取不到1000个

感谢各位的阅读,以上就是"Java爬虫之如何实现B站粉丝取关人排查"的内容了,经过本文的学习后,相信大家对Java爬虫之如何实现B站粉丝取关人排查这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是,小编将为大家推送更多相关知识点的文章,欢迎关注!

0