initial import
This commit is contained in:
68
alembic.ini
Normal file
68
alembic.ini
Normal file
@@ -0,0 +1,68 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = alembic
|
||||
|
||||
# template used to generate migration files
|
||||
# file_template = %%(rev)s_%%(slug)s
|
||||
|
||||
# max length of characters to apply to the
|
||||
# "slug" field
|
||||
#truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; this defaults
|
||||
# to alembic/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path
|
||||
# version_locations = %(here)s/bar %(here)s/bat alembic/versions
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
sqlalchemy.url = sqlite:///hnlearn.db
|
||||
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
1
alembic/README
Normal file
1
alembic/README
Normal file
@@ -0,0 +1 @@
|
||||
Generic single-database configuration.
|
||||
73
alembic/env.py
Normal file
73
alembic/env.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from __future__ import with_statement
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
from logging.config import fileConfig
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
# from myapp import mymodel
|
||||
# target_metadata = mymodel.Base.metadata
|
||||
import sys
|
||||
sys.path.append(".")
|
||||
from db import Base
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline():
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url, target_metadata=target_metadata, literal_binds=True)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online():
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section),
|
||||
prefix='sqlalchemy.',
|
||||
poolclass=pool.NullPool)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
BIN
alembic/env.pyc
Normal file
BIN
alembic/env.pyc
Normal file
Binary file not shown.
24
alembic/script.py.mako
Normal file
24
alembic/script.py.mako
Normal file
@@ -0,0 +1,24 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade():
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade():
|
||||
${downgrades if downgrades else "pass"}
|
||||
28
alembic/versions/131d925693a4_added_hnid_column.py
Normal file
28
alembic/versions/131d925693a4_added_hnid_column.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""added hnid column
|
||||
|
||||
Revision ID: 131d925693a4
|
||||
Revises: d1f648fcd62a
|
||||
Create Date: 2016-12-05 19:12:17.872699
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '131d925693a4'
|
||||
down_revision = 'd1f648fcd62a'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('found_items', sa.Column('hnid', sa.String(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('found_items', 'hnid')
|
||||
# ### end Alembic commands ###
|
||||
BIN
alembic/versions/131d925693a4_added_hnid_column.pyc
Normal file
BIN
alembic/versions/131d925693a4_added_hnid_column.pyc
Normal file
Binary file not shown.
34
alembic/versions/d1f648fcd62a_create_initial_state.py
Normal file
34
alembic/versions/d1f648fcd62a_create_initial_state.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""create initial state
|
||||
|
||||
Revision ID: d1f648fcd62a
|
||||
Revises:
|
||||
Create Date: 2016-12-05 18:37:48.117437
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'd1f648fcd62a'
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table('trained_items',
|
||||
sa.Column('name', sa.String, primary_key=True),
|
||||
sa.Column('state', sa.Boolean))
|
||||
op.create_table('found_items',
|
||||
sa.Column('id', sa.Integer, primary_key=True),
|
||||
sa.Column('name', sa.String),
|
||||
sa.Column('url', sa.String, unique=True, index=True),
|
||||
sa.Column('date', sa.DateTime, index=True),
|
||||
sa.Column('rating', sa.Float))
|
||||
pass
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_table('trained_items')
|
||||
op.drop_table('found_items')
|
||||
BIN
alembic/versions/d1f648fcd62a_create_initial_state.pyc
Normal file
BIN
alembic/versions/d1f648fcd62a_create_initial_state.pyc
Normal file
Binary file not shown.
25
alembic/versions/d1f648fcd62a_create_initial_state.py~
Normal file
25
alembic/versions/d1f648fcd62a_create_initial_state.py~
Normal file
@@ -0,0 +1,25 @@
|
||||
"""create initial state
|
||||
|
||||
Revision ID: d1f648fcd62a
|
||||
Revises:
|
||||
Create Date: 2016-12-05 18:37:48.117437
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'd1f648fcd62a'
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_table('')
|
||||
28
alembic/versions/e07ebf603ff2_added_comment_count_column.py
Normal file
28
alembic/versions/e07ebf603ff2_added_comment_count_column.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""added comment_count column
|
||||
|
||||
Revision ID: e07ebf603ff2
|
||||
Revises: 131d925693a4
|
||||
Create Date: 2016-12-05 19:49:45.783201
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'e07ebf603ff2'
|
||||
down_revision = '131d925693a4'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('found_items', sa.Column('comment_count', sa.Integer(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('found_items', 'comment_count')
|
||||
# ### end Alembic commands ###
|
||||
BIN
alembic/versions/e07ebf603ff2_added_comment_count_column.pyc
Normal file
BIN
alembic/versions/e07ebf603ff2_added_comment_count_column.pyc
Normal file
Binary file not shown.
26
db.py
Normal file
26
db.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy import Column, Boolean, Integer, String, DateTime, Float, create_engine
|
||||
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
class TrainedItem(Base):
|
||||
__tablename__ = 'trained_items'
|
||||
name = Column(String, primary_key=True)
|
||||
state = Column(Boolean)
|
||||
|
||||
class FoundItem(Base):
|
||||
__tablename__ = 'found_items'
|
||||
id = Column(Integer, primary_key=True)
|
||||
hnid = Column(String)
|
||||
comment_count = Column(Integer)
|
||||
name = Column(String)
|
||||
url = Column(String, unique=True, index=True)
|
||||
date = Column(DateTime, index=True)
|
||||
rating = Column(Float)
|
||||
|
||||
engine = create_engine('sqlite:///hnlearn.db')
|
||||
Base.metadata.create_all(engine)
|
||||
session_factory = sessionmaker(bind=engine)
|
||||
Session = scoped_session(session_factory)
|
||||
26
learn.py
Normal file
26
learn.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
|
||||
class Classifier(object):
|
||||
def __init__(self, datagrabber):
|
||||
self.grabber = datagrabber
|
||||
self.reload()
|
||||
|
||||
def reload(self):
|
||||
Xs, Ys = self.grabber()
|
||||
|
||||
self.vect = CountVectorizer(analyzer='word',ngram_range=(1,3))
|
||||
self.train_vec = self.vect.fit_transform(Xs)
|
||||
|
||||
self.clf = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, n_iter=500, random_state=42)
|
||||
self.text_clf = self.clf.fit(self.train_vec, Ys)
|
||||
|
||||
def scan(self, name):
|
||||
v = self.vect.transform([name])
|
||||
return self.text_clf.decision_function(v)[0]
|
||||
|
||||
def add(self, name, state):
|
||||
# implement add using partial_fit
|
||||
# this would mean switching to hashing vectorizer, which means we can't reverse the model
|
||||
# so for now we're just going to reload completely
|
||||
self.reload()
|
||||
64
views/index.tpl
Normal file
64
views/index.tpl
Normal file
@@ -0,0 +1,64 @@
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.css"/>
|
||||
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.js"></script>
|
||||
<script>
|
||||
$(document).ready(function()
|
||||
{
|
||||
$('#table').DataTable({
|
||||
"order": [[{{sortby}}, 'desc']],
|
||||
"paging": false
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<nav class="navbar navbar-default">
|
||||
<div class="container-fluid">
|
||||
<div class="navbar-header">
|
||||
<a class="navbar-brand" href="#">HNLearn</a>
|
||||
</div>
|
||||
|
||||
<div class="collapse navbar-collapse">
|
||||
<ul class="nav navbar-nav">
|
||||
<li><a href="/">Latest > 0</a></li>
|
||||
<li><a href="/?all=true">Latest with < 0</a></li>
|
||||
<li><a href="/?limit=day&all=true">Top for Past Day</a></li>
|
||||
<li><a href="/?limit=week&all=true">Top for Past Week</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<table id="table" class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Title</th>
|
||||
<th>Date</th>
|
||||
<th>Score</th>
|
||||
<th>Up/Down</th>
|
||||
<th>Comments</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
%for x in items:
|
||||
<tr>
|
||||
<td>
|
||||
<a href="{{x.url}}">{{x.name}}</a>
|
||||
</td>
|
||||
<td>{{x.date.strftime("%Y-%m-%d %H:%M")}}</td>
|
||||
<td>{{"{0:.4g}".format(x.rating)}}</td>
|
||||
<td><a href="/rate/{{x.id}}?rating=good">Up</a> / <a href="/rate/{{x.id}}?rating=bad">Down</a></td>
|
||||
<td>
|
||||
<a href="https://news.ycombinator.com/?item={{x.hnid}}">
|
||||
{{x.comment_count}} comments
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
%end
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
57
views/index.tpl~
Normal file
57
views/index.tpl~
Normal file
@@ -0,0 +1,57 @@
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.css"/>
|
||||
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.js"></script>
|
||||
<script>
|
||||
$(document).ready(function()
|
||||
{
|
||||
$('#table').DataTable({
|
||||
"order": [[{{sortby}}, 'desc']],
|
||||
"paging": false
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<nav class="navbar navbar-default">
|
||||
<div class="container-fluid">
|
||||
<div class="navbar-header">
|
||||
<a class="navbar-brand" href="#">HNLearn</a>
|
||||
</div>
|
||||
|
||||
<div class="collapse navbar-collapse">
|
||||
<ul class="nav navbar-nav">
|
||||
<li><a href="/">Latest > 0</a></li>
|
||||
<li><a href="/?all=true">Latest with < 0</a></li>
|
||||
<li><a href="/?limit=day&all=true">Top for Past Day</a></li>
|
||||
<li><a href="/?limit=week&all=true">Top for Past Week</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<table id="table" class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Title</th>
|
||||
<th>Date</th>
|
||||
<th>Score</th>
|
||||
<th>Up/Down</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
%for x in items:
|
||||
<tr>
|
||||
<td><a href="{{x.url}}">{{x.name}}</a></td>
|
||||
<td>{{x.date.strftime("%Y-%m-%d %H:%M")}}</td>
|
||||
<td>{{"{0:.4g}".format(x.rating)}}</td>
|
||||
<td><a href="/rate/{{x.id}}?rating=good">Up</a> / <a href="/rate/{{x.id}}?rating=bad">Down</a></td>
|
||||
|
||||
</tr>
|
||||
%end
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
127
web.py
Normal file
127
web.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from db import *
|
||||
from learn import *
|
||||
import datetime
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import bottle
|
||||
from threading import Thread
|
||||
import schedule
|
||||
import time
|
||||
|
||||
def grab_all_data():
|
||||
session = Session()
|
||||
Xs = []
|
||||
Ys = []
|
||||
for name, state in session.query(TrainedItem.name, TrainedItem.state):
|
||||
Xs.append(name)
|
||||
Ys.append(state)
|
||||
return (Xs, Ys)
|
||||
|
||||
def import_data():
|
||||
session = Session()
|
||||
trained = load_files('data', shuffle=True)
|
||||
|
||||
for i in range(len(trained.data)):
|
||||
item = TrainedItem(name=trained.data[i].decode('utf8'), state=bool(trained.target[i]))
|
||||
session.add(item)
|
||||
|
||||
session.commit()
|
||||
|
||||
# import_data()
|
||||
clf = Classifier(grab_all_data)
|
||||
|
||||
@bottle.route("/update")
|
||||
def updateHN():
|
||||
print "Updating HN..."
|
||||
session = Session()
|
||||
resp = requests.get("https://news.ycombinator.com/")
|
||||
soup = BeautifulSoup(resp.text, "lxml")
|
||||
for t in soup.find_all('td', align=None, class_='title'):
|
||||
parent_tr = t.parent
|
||||
url = t.find('a', class_="storylink")
|
||||
if url is not None:
|
||||
url = url["href"]
|
||||
print parent_tr
|
||||
hnid = parent_tr["id"]
|
||||
comment_count_text = unicode(parent_tr.next_sibling.find_all('a', href="item?id=" + hnid)[-1].text).replace(u"\xa0comments", "")
|
||||
comment_count = 0
|
||||
try:
|
||||
comment_count = int(comment_count_text)
|
||||
except:
|
||||
print repr(comment_count_text)
|
||||
pass
|
||||
check = session.query(FoundItem).filter(FoundItem.url == url).one_or_none()
|
||||
print url, check
|
||||
if check is None:
|
||||
print url
|
||||
item = FoundItem(name=t.text,
|
||||
hnid=hnid,
|
||||
comment_count=comment_count,
|
||||
url=url,
|
||||
date=datetime.datetime.now(),
|
||||
rating=clf.scan(unicode(t.text)))
|
||||
session.add(item)
|
||||
else:
|
||||
check.hnid = hnid
|
||||
check.comment_count = comment_count
|
||||
session.commit()
|
||||
|
||||
class SchedThread(Thread):
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(1)
|
||||
|
||||
@bottle.route('/')
|
||||
@bottle.view('index.tpl')
|
||||
def news():
|
||||
# load news from DB and display
|
||||
session = Session()
|
||||
|
||||
sortCol = 1
|
||||
|
||||
items = session.query(FoundItem)
|
||||
|
||||
showUnder = bottle.request.params.get("all") == "true"
|
||||
if not showUnder:
|
||||
items = items.filter(FoundItem.rating > 0)
|
||||
|
||||
if bottle.request.params.get("limit") == "week":
|
||||
ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
||||
items = items.filter(FoundItem.date > ago) # past week
|
||||
sortCol = 2
|
||||
elif bottle.request.params.get("limit") == "day":
|
||||
ago = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||
items = items.filter(FoundItem.date > ago) # past day
|
||||
sortCol = 2
|
||||
else:
|
||||
items = items.order_by(FoundItem.date.desc()).limit(100)
|
||||
|
||||
return dict(items=items, sortby=sortCol)
|
||||
|
||||
@bottle.route('/rate/<id:int>')
|
||||
def rate(id):
|
||||
session = Session()
|
||||
rating = bottle.request.params.get('rating') == "good"
|
||||
item = session.query(FoundItem).filter(FoundItem.id == id).one()
|
||||
# insert or update
|
||||
session.merge(TrainedItem(name=item.name, state=rating))
|
||||
session.commit()
|
||||
clf.add(item.name, rating)
|
||||
# re-rate all items in DB
|
||||
for item in session.query(FoundItem):
|
||||
item.rating = clf.scan(item.name)
|
||||
session.commit()
|
||||
bottle.redirect("/")
|
||||
|
||||
if __name__ == "__main__":
|
||||
schedule.every(10).minutes.do(updateHN)
|
||||
st = SchedThread()
|
||||
#st.daemon = True
|
||||
st.start()
|
||||
bottle.run(host="0.0.0.0",port=55512)
|
||||
Reference in New Issue
Block a user