initial import
This commit is contained in:
127
web.py
Normal file
127
web.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from db import *
|
||||
from learn import *
|
||||
import datetime
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import bottle
|
||||
from threading import Thread
|
||||
import schedule
|
||||
import time
|
||||
|
||||
def grab_all_data():
|
||||
session = Session()
|
||||
Xs = []
|
||||
Ys = []
|
||||
for name, state in session.query(TrainedItem.name, TrainedItem.state):
|
||||
Xs.append(name)
|
||||
Ys.append(state)
|
||||
return (Xs, Ys)
|
||||
|
||||
def import_data():
|
||||
session = Session()
|
||||
trained = load_files('data', shuffle=True)
|
||||
|
||||
for i in range(len(trained.data)):
|
||||
item = TrainedItem(name=trained.data[i].decode('utf8'), state=bool(trained.target[i]))
|
||||
session.add(item)
|
||||
|
||||
session.commit()
|
||||
|
||||
# import_data()
|
||||
clf = Classifier(grab_all_data)
|
||||
|
||||
@bottle.route("/update")
|
||||
def updateHN():
|
||||
print "Updating HN..."
|
||||
session = Session()
|
||||
resp = requests.get("https://news.ycombinator.com/")
|
||||
soup = BeautifulSoup(resp.text, "lxml")
|
||||
for t in soup.find_all('td', align=None, class_='title'):
|
||||
parent_tr = t.parent
|
||||
url = t.find('a', class_="storylink")
|
||||
if url is not None:
|
||||
url = url["href"]
|
||||
print parent_tr
|
||||
hnid = parent_tr["id"]
|
||||
comment_count_text = unicode(parent_tr.next_sibling.find_all('a', href="item?id=" + hnid)[-1].text).replace(u"\xa0comments", "")
|
||||
comment_count = 0
|
||||
try:
|
||||
comment_count = int(comment_count_text)
|
||||
except:
|
||||
print repr(comment_count_text)
|
||||
pass
|
||||
check = session.query(FoundItem).filter(FoundItem.url == url).one_or_none()
|
||||
print url, check
|
||||
if check is None:
|
||||
print url
|
||||
item = FoundItem(name=t.text,
|
||||
hnid=hnid,
|
||||
comment_count=comment_count,
|
||||
url=url,
|
||||
date=datetime.datetime.now(),
|
||||
rating=clf.scan(unicode(t.text)))
|
||||
session.add(item)
|
||||
else:
|
||||
check.hnid = hnid
|
||||
check.comment_count = comment_count
|
||||
session.commit()
|
||||
|
||||
class SchedThread(Thread):
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(1)
|
||||
|
||||
@bottle.route('/')
|
||||
@bottle.view('index.tpl')
|
||||
def news():
|
||||
# load news from DB and display
|
||||
session = Session()
|
||||
|
||||
sortCol = 1
|
||||
|
||||
items = session.query(FoundItem)
|
||||
|
||||
showUnder = bottle.request.params.get("all") == "true"
|
||||
if not showUnder:
|
||||
items = items.filter(FoundItem.rating > 0)
|
||||
|
||||
if bottle.request.params.get("limit") == "week":
|
||||
ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
||||
items = items.filter(FoundItem.date > ago) # past week
|
||||
sortCol = 2
|
||||
elif bottle.request.params.get("limit") == "day":
|
||||
ago = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||
items = items.filter(FoundItem.date > ago) # past day
|
||||
sortCol = 2
|
||||
else:
|
||||
items = items.order_by(FoundItem.date.desc()).limit(100)
|
||||
|
||||
return dict(items=items, sortby=sortCol)
|
||||
|
||||
@bottle.route('/rate/<id:int>')
|
||||
def rate(id):
|
||||
session = Session()
|
||||
rating = bottle.request.params.get('rating') == "good"
|
||||
item = session.query(FoundItem).filter(FoundItem.id == id).one()
|
||||
# insert or update
|
||||
session.merge(TrainedItem(name=item.name, state=rating))
|
||||
session.commit()
|
||||
clf.add(item.name, rating)
|
||||
# re-rate all items in DB
|
||||
for item in session.query(FoundItem):
|
||||
item.rating = clf.scan(item.name)
|
||||
session.commit()
|
||||
bottle.redirect("/")
|
||||
|
||||
if __name__ == "__main__":
|
||||
schedule.every(10).minutes.do(updateHN)
|
||||
st = SchedThread()
|
||||
#st.daemon = True
|
||||
st.start()
|
||||
bottle.run(host="0.0.0.0",port=55512)
|
||||
Reference in New Issue
Block a user