1.取出h1标签的文本
import requests
newsurl='http://localhost:63342/bd/cgc.html?_ijt=r6216qes962k0f6q6ufku7ajkc'
res = requests.get(newsurl) #返回response对象
res.encoding='utf-8'
from bs4 import BeautifulSoup
soup = BeautifulSoup(res.text,'html.parser')
print(soup.h1.text)
2.取出a标签的链接
print(soup.select('a')[0].attrs['href'])
3.取出所有li标签的所有内容
print(soup.li.contents)
4.取出第2个li标签的a标签的第3个div标签的属性
print(soup.find_all('li')[1].a.find_all('div')[2].attrs)
5.取出一条新闻的标题、链接、发布时间、来源
print(soup.select('div .news-list-title')[0].text)
print(soup.select('div .news-list-thumb')[0].parent.attrs.get('href'))
print(soup.select('div .news-list-info > span')[0].text)
print(soup.select('div .news-list-info > span')[1].text)