import requests
from bs4 import BeautifulSoup
import pprint
url='https://news.ycombinator.com/'
url2='https://news.ycombinator.com/news?p=2'
res=requests.get(url)
res2=requests.get(url2)
soup=BeautifulSoup(res.text,'html.parser')
soup2=BeautifulSoup(res2.text,'html.parser')
links=soup.select('.storylink')
subtext=soup.select('.subtext')
links2=soup2.select('.storylink')
subtext2=soup2.select('.subtext')
def sorting_votes(hnlist):
return sorted(hnlist,key=lambda k:k['votes'],reverse=True)
megalinks=links + links2
megasubtext=subtext+subtext2
def custom_hn(links,subtext):
hn=[]
for idx,item in enumerate(links):
title=links[idx].getText()
href=links[idx].get('href',None)
vote=subtext[idx].select('.score')
if len(vote):
points=int(vote[0].getText().replace(' points',''))
if points >100:
hn.append({'tittle':title, 'link': href, 'votes':points})
return sorting_votes(hn)
pprint.pprint(custom_hn(megalinks,megasubtext))
#Advantages You don't need to read all the unnecessary links, you can filter them out
and read-only popular one with chronological order.
Comments
Post a Comment