import requestsfrom bs4 import BeautifulSoupsoup = BeautifulSoup(requests.get('https://wordpress-edu-3autumn.localprod.forc.work/').text,'html.parser')for i in soup.find_all('h2',class_='entry-title'): print(i.find('a').text) with open('{}.html'.format(i.find('a').text),'w',encoding='utf8') as file: soup = BeautifulSoup(requests.get(i.find('a')['href']).text,'lxml') file.write(str(soup.find('div',class_='entry-content')))复制代码
作业2: 爬取分类下的图书名和对应价格, 保存到books.txt
最终效果...
import requestsfrom bs4 import BeautifulSoupsoup = BeautifulSoup(requests.get('http://books.toscrape.com/').text,'html.parser')with open('books.txt','w',encoding='utf8') as file: for i in soup.find('ul',class_='nav nav-list').find('ul').find_all('li'): file.write(i.text.strip()+'\n') res = requests.get("http://books.toscrape.com/"+i.find('a')['href']) res.encoding='utf8' soup = BeautifulSoup(res.text,'html.parser') for j in soup.find_all('li',class_="col-xs-6 col-sm-4 col-md-3 col-lg-3"): print(j.find('h3').find('a')['title']) file.write('\t"{}" {}\n'.format(j.find('h3').find('a')['title'],j.find('p',class_='price_color').text))复制代码
Travel "It's Only the Himalayas" £45.17 "Full Moon over Noah’s Ark: An Odyssey to Mount Ararat and Beyond" £49.43 "See America: A Celebration of Our National Parks & Treasured Sites" £48.87 "Vagabonding: An Uncommon Guide to the Art of Long-Term World Travel" £36.94 "Under the Tuscan Sun" £37.33 "A Summer In Europe" £44.34 "The Great Railway Bazaar" £30.54 "A Year in Provence (Provence #1)" £56.88 "The Road to Little Dribbling: Adventures of an American in Britain (Notes From a Small Island #2)" £23.21 "Neither Here nor There: Travels in Europe" £38.95 "1,000 Places to See Before You Die" £26.08Mystery "Sharp Objects" £47.82 "In a Dark, Dark Wood" £19.63 "The Past Never Ends" £56.50 "A Murder in Time" £16.64 "The Murder of Roger Ackroyd (Hercule Poirot #4)" £44.10 "The Last Mile (Amos Decker #2)" £54.21 "That Darkness (Gardiner and Renner #1)" £13.92 "Tastes Like Fear (DI Marnie Rome #3)" £10.69 "A Time of Torment (Charlie Parker #14)" £48.35 "A Study in Scarlet (Sherlock Holmes #1)" £16.73 "Poisonous (Max Revere Novels #3)" £26.80 "Murder at the 42nd Street Library (Raymond Ambler #1)" £54.36 "Most Wanted" £35.28 "Hide Away (Eve Duncan #20)" £11.84 "Boar Island (Anna Pigeon #19)" £59.48 "The Widow" £27.26 "Playing with Fire" £13.71 "What Happened on Beale Street (Secrets of the South Mysteries #2)" £25.37 "The Bachelor Girl's Guide to Murder (Herringford and Watts Mysteries #1)" £52.30 "Delivering the Truth (Quaker Midwife Mystery #1)" £20.89Historical Fiction "Tipping the Velvet" £53.74 "Forever and Forever: The Courtship of Henry Longfellow and Fanny Appleton" £29.69 "A Flight of Arrows (The Pathfinders #2)" £55.53 "The House by the Lake" £36.95 "Mrs. Houdini" £30.25 "The Marriage of Opposites" £28.08 "Glory over Everything: Beyond The Kitchen House" £45.84 "Love, Lies and Spies" £20.55 "A Paris Apartment" £39.01 "Lilac Girls" £17.28 "The Constant Princess (The Tudor Court #1)" £16.62 "The Invention of Wings" £37.34 "World Without End (The Pillars of the Earth #2)" £32.97 "The Passion of Dolssa" £28.32 "Girl With a Pearl Earring" £26.77 "Voyager (Outlander #3)" £21.07 "The Red Tent" £35.66 "The Last Painting of Sara de Vos" £55.55 "The Guernsey Literary and Potato Peel Pie Society" £49.53 "Girl in the Blue Coat" £46.83......复制代码