PyQuery库的简单实用(Python)
程序员文章站
2022-05-02 18:07:24
...
import requests
from bs4 import BeautifulSoup
from pyquery import PyQuery as pq
if __name__ == '__main__':
# 实例1
# url = 'https://book.douban.com'
# response = requests.get(url).text
# soup = BeautifulSoup(response, 'lxml')
# doc = pq(str(soup.head))
# print(doc('meta'))
# 实例2 URL初始化
# doc = pq(url='https://book.douban.com')
# print(doc('head'))
# 实例3 文件初始化
# doc = pq(filename='test.html')
# print(doc('meta'))
# 基本的css选择器
# doc = pq(url='https://book.douban.com')
# print(doc('#db-global-nav .top-nav-info a'))
# # 查找所有子元素 find('') 或 children()
# doc = pq(url='https://book.douban.com')
# items = doc('#db-global-nav .top-nav-info')
# print(items)
# print(items.find('a'))
# print(items.children('.nav-login'))
# 查找父元素和所有祖先元素 parent('') parents('')
# doc = pq(url='https://book.douban.com')
# items = doc('#db-global-nav')
# print(items.parent())
# print(items.parents())
# 查找兄弟元素
# doc = pq(url='https://book.douban.com')
# items = doc('#db-global-nav .top-nav-info .nav-login')
# print(items.siblings())
# 获取到多个元素进行遍历, 可以生成一个生成器对象进行操作
# doc = pq(url='https://book.douban.com')
# lst = doc('meta').items()
# for item in lst:
# print(item)
# 获取属性
# doc = pq(url='https://book.douban.com')
# a = doc('a').items()
# for item in a:
# print(item.attr('href'))
# print(item.attr.href)
# 获取文本 text()
# doc = pq(url='https://book.douban.com')
# a = doc('a').items()
# for item in a:
# print(item.text())
# 获取标签中的html代码
# doc = pq(url='https://book.douban.com')
# a = doc('#db-global-nav a').items()
# for item in a:
# print(item.html())
# DOM操作class
# doc = pq(url='https://book.douban.com')
# a = doc('#db-global-nav .top-nav-info a').items()
# for item in a:
# item.remove_class('nav-login')
# for item in a:
# item.add_class('nav-login')
# print(item)
# 增加属性和样式
# doc = pq(url='https://book.douban.com')
# a = doc('#db-global-nav .top-nav-info a').items()
# a.attr('name', 'link')
# a.css('font-size', '14px')
# # remove方法的使用
# a.find('p').remove()
# print(a.text())
# 常用的伪类选择器
# doc = pq(url='https://book.douban.com')
# l1 = doc('li:first-child') # 第一个li元素
# l2 = doc('li:last-child') # 最后一个li元素
# l3 = doc('li:nth-child(2)') # 制定第几个元素
# l4 = doc('li:gt(2)') # 大于地几个元素
# l5 = doc('li:nth-child(2n)') # 制定偶数行元素
# l6 = doc('li:contains(second)') # 制定包含某个字符串的元素