获取微信公众号用户信息
程序员文章站
2022-07-12 11:20:30
...
1、使用 Java 获取微信公众号用户信息
① 简介
- 微信订阅号貌似没有获取用户的 data-id 和对应昵称的接口,所以只能头铁写个脚本进行获取
② 准备事项
- jdk 环境
- IDE 编辑器
- java 的 selenium jar 包
- Google Chrome 浏览器及对应的驱动
③ 脚本代码
package com.jeffrey.manager;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;
import java.util.*;
/**
* @author jeffrey
* @ClassName: GetUserDataId
* @Description: 获取用户 data-id 和 对应的昵称
* @date: 2021/8/15 9:09 下午
* @version: 1.0
* @since JDK 1.8
*/
public class GetUserDataId {
/**
* @Description: 键为用户的 data-id 值为 data-id 对应的昵称
*/
private static final Map<String, String> USER_DATA_MAP = new HashMap<>();
/**
* @Description: selenium 驱动路径
*/
private static final String CHROMEDRIVER_DRIVER = "src/com/jeffrey/utils/chromedriver";
/**
* @Description: 操作间隔时间
*/
private static final int SPEED = 2000;
public static void main(String[] args) {
System.setProperty("webdriver.chrome.driver", CHROMEDRIVER_DRIVER);
WebDriver driver = new ChromeDriver();
driver.get("https://mp.weixin.qq.com/");
WebDriverWait wait = new WebDriverWait(driver, 100);
System.out.println("等待扫码登录");
wait.until(new ExpectedCondition<WebElement>() {
@Override
public WebElement apply(WebDriver webDriver) {
return driver.findElement(By.xpath("//*[@id=\"app\"]/div[2]/div[2]/div[2]/ul[1]/li[3]/em/a"));
}
}).click();
/**
* @Description: 用户总条目数,用于最后与 USER_DATA_MAP 进行校验
*/
int allUserCount = Integer.parseInt(driver.findElement(By.cssSelector("#groupsList > dl:nth-child(1) > dt > a > em")).getText().replace("(", "").replace(")", ""));
while (true) {
try {
Thread.sleep(SPEED);
} catch (InterruptedException e) {
e.printStackTrace();
}
String[] pn;
try {
List<WebElement> idEle = driver.findElements(By.className("js_msgSenderAvatar"));
List<WebElement> nameEle = driver.findElements(By.className("remark_name"));
for (int i = 0; i < idEle.size(); i++) {
USER_DATA_MAP.put(idEle.get(i).getAttribute("data-id"), nameEle.get(i).getText());
}
pn = wait.until(new ExpectedCondition<WebElement>() {
@Override
public WebElement apply(WebDriver webDriver) {
return driver.findElement(By.className("page_num"));
}
}).getText().replace(" ", "").split("/");
} catch (RuntimeException e) {
e.printStackTrace();
continue;
} finally {
System.out.println("已获取 " + USER_DATA_MAP.size() + " 个用户信息,共 " + allUserCount + " 位用户");
}
// 判断是否最后一页
if (Integer.parseInt(pn[0]) == Integer.parseInt(pn[1])) {
if (allUserCount == USER_DATA_MAP.size()) {
System.out.println("用户总数" + USER_DATA_MAP.size());
Set<Map.Entry<String, String>> entries = USER_DATA_MAP.entrySet();
for (Map.Entry<String, String> bean : entries) {
System.out.println("用户 id:" + bean.getKey() + "\n用户昵称:" + bean.getValue() + "\n----------\n");
}
driver.close();
break;
} else {
throw new RuntimeException("获取到的用户信息条目数与总条目数不符");
}
} else {
// 这里应该使用 full xpath 来定位,CssSelector 和 className 选择器的每个页面 #wxPagebar_ 属性都不同
wait.until(new ExpectedCondition<WebElement>() {
@Override
public WebElement apply(WebDriver webDriver) {
return driver.findElement(By.xpath("/html/body/div[2]/div/div[3]/div/div/div[4]/div[2]/div[1]/div/div[3]/div/div/span[1]/a[3]"));
}
}).click();
}
}
}
}