JAVA 爬虫 抖音视频
程序员文章站
2022-04-11 15:33:45
...
代码还是有很多不足,希望发现的通知一声!!! 万分感谢
终态ID:这个只是我给取得名字,理解就好。
根据抖音用户的终态ID来进入用户的主页,进而进行下载。终态ID获取方式:手机端-->用户主页-->分享名片-->链接分享中,
例:https://www.douyin.com/share/user/59021821479/?share_type=link ID:59021821479
下方是代码
package dou_yin;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
/**
* 根据提供的抖音的ID获取他的所有视频ID
*
* @author lenovo
*
*/
public class DownloadUrl {
String URl_Id = null;
int count = 0;// 计数
// 根据主页获取每个视频的id
public void DownHtml(String url) throws InterruptedException {
ArrayList<String> alURl = new ArrayList<>();//视频id集合
ArrayList<String> alMP4 = new ArrayList<>();//视频下载URL集合
System.setProperty("webdriver.chrome.driver", "E:/chromedriver/chromedriver_win32/chromedriver.exe");
// 实例化一个浏览器对象
WebDriver driver = new ChromeDriver();
driver.get(url);
Thread.sleep(8000);// 休眠等待页面加载
List<WebElement> elements = driver.findElements(By.cssSelector("li.item,goWork"));// 获取到每个视频的模块
System.out.println(elements.size());
//获取每个URl的ID
for (WebElement we : elements) {
String ids = we.getAttribute("data-id").toString();// 获取模块的data-id的属性值
alURl.add("https://www.iesdouyin.com/share/video/" + ids);
}
driver.get("http://douyin.iiilab.com/");// 打开可以将每个视频链接转化成可以下载的链接的网页
Thread.sleep(8000);// 休眠等待页面加载
//获取可以下载的url
for (int i = 0; i < alURl.size(); i++) {
driver.findElement(By.cssSelector("input.form-control.link-input")).clear();// 清空这个输入框
driver.findElement(By.cssSelector("input.form-control.link-input")).sendKeys(alURl.get(i));// 将需要转换的链接放入该输入框中
driver.findElement(By.cssSelector("button.btn.btn-default")).click();// 点击解析
Thread.sleep(4000);// 休眠等待页面加载
alMP4.add( driver.findElement(By.cssSelector("a.btn.btn-success")).getAttribute("href").toString());// 获取解析后的链接
}
driver.close();
//下载
for (int i = 0; i < alMP4.size(); i++) {
DownloadFile df = new DownloadFile();
df.run(alMP4.get(i));
}
}
/**
* 入口
*
* @param id
*/
public static void main(String[] args) {
DownloadUrl dl = new DownloadUrl();
String ID = "80602533314";// 人物ID
try {
dl.DownHtml("https://www.douyin.com/share/user/" + ID + "/?share_type=link");
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
package dou_yin;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* 根据视频下载链接 下载视频 后缀为 .mp4 等
*
* @author lenovo
*
*/
public class DownloadFile {
public void downLoadFromUrl(String urlStr, String fileName, String savePath) throws IOException {
URL url = new URL(urlStr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(3000);
conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
InputStream inputStream = conn.getInputStream();
byte[] getData = readInputStream(inputStream);
java.io.File saveDir = new java.io.File(savePath);
if (!saveDir.exists()) {
saveDir.mkdir();
}
java.io.File file = new java.io.File(saveDir + java.io.File.separator + fileName);
FileOutputStream fos = new FileOutputStream(file);
fos.write(getData);
if (fos != null) {
fos.close();
}
if (inputStream != null) {
inputStream.close();
}
}
public byte[] readInputStream(InputStream inputStream) throws IOException {
byte[] buffer = new byte[1024];
int len = 0;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
while ((len = inputStream.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
bos.close();
return bos.toByteArray();
}
/**
* 程序入口
* @param urlStr
*/
public void run(String urlStr) {
long imageTitile = System.currentTimeMillis();
String fileName = imageTitile + "." + "mp4";
String savePath = "G:\\VidioVidioVidioVidioVidioVidio";
try {
downLoadFromUrl(urlStr, fileName, savePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}