initial import
This commit is contained in:
68
alembic.ini
Normal file
68
alembic.ini
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# A generic, single database configuration.
|
||||||
|
|
||||||
|
[alembic]
|
||||||
|
# path to migration scripts
|
||||||
|
script_location = alembic
|
||||||
|
|
||||||
|
# template used to generate migration files
|
||||||
|
# file_template = %%(rev)s_%%(slug)s
|
||||||
|
|
||||||
|
# max length of characters to apply to the
|
||||||
|
# "slug" field
|
||||||
|
#truncate_slug_length = 40
|
||||||
|
|
||||||
|
# set to 'true' to run the environment during
|
||||||
|
# the 'revision' command, regardless of autogenerate
|
||||||
|
# revision_environment = false
|
||||||
|
|
||||||
|
# set to 'true' to allow .pyc and .pyo files without
|
||||||
|
# a source .py file to be detected as revisions in the
|
||||||
|
# versions/ directory
|
||||||
|
# sourceless = false
|
||||||
|
|
||||||
|
# version location specification; this defaults
|
||||||
|
# to alembic/versions. When using multiple version
|
||||||
|
# directories, initial revisions must be specified with --version-path
|
||||||
|
# version_locations = %(here)s/bar %(here)s/bat alembic/versions
|
||||||
|
|
||||||
|
# the output encoding used when revision files
|
||||||
|
# are written from script.py.mako
|
||||||
|
# output_encoding = utf-8
|
||||||
|
|
||||||
|
sqlalchemy.url = sqlite:///hnlearn.db
|
||||||
|
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
[loggers]
|
||||||
|
keys = root,sqlalchemy,alembic
|
||||||
|
|
||||||
|
[handlers]
|
||||||
|
keys = console
|
||||||
|
|
||||||
|
[formatters]
|
||||||
|
keys = generic
|
||||||
|
|
||||||
|
[logger_root]
|
||||||
|
level = WARN
|
||||||
|
handlers = console
|
||||||
|
qualname =
|
||||||
|
|
||||||
|
[logger_sqlalchemy]
|
||||||
|
level = WARN
|
||||||
|
handlers =
|
||||||
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
|
[logger_alembic]
|
||||||
|
level = INFO
|
||||||
|
handlers =
|
||||||
|
qualname = alembic
|
||||||
|
|
||||||
|
[handler_console]
|
||||||
|
class = StreamHandler
|
||||||
|
args = (sys.stderr,)
|
||||||
|
level = NOTSET
|
||||||
|
formatter = generic
|
||||||
|
|
||||||
|
[formatter_generic]
|
||||||
|
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||||
|
datefmt = %H:%M:%S
|
||||||
1
alembic/README
Normal file
1
alembic/README
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Generic single-database configuration.
|
||||||
73
alembic/env.py
Normal file
73
alembic/env.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
from alembic import context
|
||||||
|
from sqlalchemy import engine_from_config, pool
|
||||||
|
from logging.config import fileConfig
|
||||||
|
|
||||||
|
# this is the Alembic Config object, which provides
|
||||||
|
# access to the values within the .ini file in use.
|
||||||
|
config = context.config
|
||||||
|
|
||||||
|
# Interpret the config file for Python logging.
|
||||||
|
# This line sets up loggers basically.
|
||||||
|
fileConfig(config.config_file_name)
|
||||||
|
|
||||||
|
# add your model's MetaData object here
|
||||||
|
# for 'autogenerate' support
|
||||||
|
# from myapp import mymodel
|
||||||
|
# target_metadata = mymodel.Base.metadata
|
||||||
|
import sys
|
||||||
|
sys.path.append(".")
|
||||||
|
from db import Base
|
||||||
|
target_metadata = Base.metadata
|
||||||
|
|
||||||
|
# other values from the config, defined by the needs of env.py,
|
||||||
|
# can be acquired:
|
||||||
|
# my_important_option = config.get_main_option("my_important_option")
|
||||||
|
# ... etc.
|
||||||
|
|
||||||
|
|
||||||
|
def run_migrations_offline():
|
||||||
|
"""Run migrations in 'offline' mode.
|
||||||
|
|
||||||
|
This configures the context with just a URL
|
||||||
|
and not an Engine, though an Engine is acceptable
|
||||||
|
here as well. By skipping the Engine creation
|
||||||
|
we don't even need a DBAPI to be available.
|
||||||
|
|
||||||
|
Calls to context.execute() here emit the given string to the
|
||||||
|
script output.
|
||||||
|
|
||||||
|
"""
|
||||||
|
url = config.get_main_option("sqlalchemy.url")
|
||||||
|
context.configure(
|
||||||
|
url=url, target_metadata=target_metadata, literal_binds=True)
|
||||||
|
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
def run_migrations_online():
|
||||||
|
"""Run migrations in 'online' mode.
|
||||||
|
|
||||||
|
In this scenario we need to create an Engine
|
||||||
|
and associate a connection with the context.
|
||||||
|
|
||||||
|
"""
|
||||||
|
connectable = engine_from_config(
|
||||||
|
config.get_section(config.config_ini_section),
|
||||||
|
prefix='sqlalchemy.',
|
||||||
|
poolclass=pool.NullPool)
|
||||||
|
|
||||||
|
with connectable.connect() as connection:
|
||||||
|
context.configure(
|
||||||
|
connection=connection,
|
||||||
|
target_metadata=target_metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
if context.is_offline_mode():
|
||||||
|
run_migrations_offline()
|
||||||
|
else:
|
||||||
|
run_migrations_online()
|
||||||
BIN
alembic/env.pyc
Normal file
BIN
alembic/env.pyc
Normal file
Binary file not shown.
24
alembic/script.py.mako
Normal file
24
alembic/script.py.mako
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
"""${message}
|
||||||
|
|
||||||
|
Revision ID: ${up_revision}
|
||||||
|
Revises: ${down_revision | comma,n}
|
||||||
|
Create Date: ${create_date}
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
${imports if imports else ""}
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = ${repr(up_revision)}
|
||||||
|
down_revision = ${repr(down_revision)}
|
||||||
|
branch_labels = ${repr(branch_labels)}
|
||||||
|
depends_on = ${repr(depends_on)}
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
${upgrades if upgrades else "pass"}
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
${downgrades if downgrades else "pass"}
|
||||||
28
alembic/versions/131d925693a4_added_hnid_column.py
Normal file
28
alembic/versions/131d925693a4_added_hnid_column.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""added hnid column
|
||||||
|
|
||||||
|
Revision ID: 131d925693a4
|
||||||
|
Revises: d1f648fcd62a
|
||||||
|
Create Date: 2016-12-05 19:12:17.872699
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '131d925693a4'
|
||||||
|
down_revision = 'd1f648fcd62a'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('found_items', sa.Column('hnid', sa.String(), nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('found_items', 'hnid')
|
||||||
|
# ### end Alembic commands ###
|
||||||
BIN
alembic/versions/131d925693a4_added_hnid_column.pyc
Normal file
BIN
alembic/versions/131d925693a4_added_hnid_column.pyc
Normal file
Binary file not shown.
34
alembic/versions/d1f648fcd62a_create_initial_state.py
Normal file
34
alembic/versions/d1f648fcd62a_create_initial_state.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
"""create initial state
|
||||||
|
|
||||||
|
Revision ID: d1f648fcd62a
|
||||||
|
Revises:
|
||||||
|
Create Date: 2016-12-05 18:37:48.117437
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'd1f648fcd62a'
|
||||||
|
down_revision = None
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
op.create_table('trained_items',
|
||||||
|
sa.Column('name', sa.String, primary_key=True),
|
||||||
|
sa.Column('state', sa.Boolean))
|
||||||
|
op.create_table('found_items',
|
||||||
|
sa.Column('id', sa.Integer, primary_key=True),
|
||||||
|
sa.Column('name', sa.String),
|
||||||
|
sa.Column('url', sa.String, unique=True, index=True),
|
||||||
|
sa.Column('date', sa.DateTime, index=True),
|
||||||
|
sa.Column('rating', sa.Float))
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_table('trained_items')
|
||||||
|
op.drop_table('found_items')
|
||||||
BIN
alembic/versions/d1f648fcd62a_create_initial_state.pyc
Normal file
BIN
alembic/versions/d1f648fcd62a_create_initial_state.pyc
Normal file
Binary file not shown.
25
alembic/versions/d1f648fcd62a_create_initial_state.py~
Normal file
25
alembic/versions/d1f648fcd62a_create_initial_state.py~
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""create initial state
|
||||||
|
|
||||||
|
Revision ID: d1f648fcd62a
|
||||||
|
Revises:
|
||||||
|
Create Date: 2016-12-05 18:37:48.117437
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'd1f648fcd62a'
|
||||||
|
down_revision = None
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_table('')
|
||||||
28
alembic/versions/e07ebf603ff2_added_comment_count_column.py
Normal file
28
alembic/versions/e07ebf603ff2_added_comment_count_column.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""added comment_count column
|
||||||
|
|
||||||
|
Revision ID: e07ebf603ff2
|
||||||
|
Revises: 131d925693a4
|
||||||
|
Create Date: 2016-12-05 19:49:45.783201
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'e07ebf603ff2'
|
||||||
|
down_revision = '131d925693a4'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column('found_items', sa.Column('comment_count', sa.Integer(), nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('found_items', 'comment_count')
|
||||||
|
# ### end Alembic commands ###
|
||||||
BIN
alembic/versions/e07ebf603ff2_added_comment_count_column.pyc
Normal file
BIN
alembic/versions/e07ebf603ff2_added_comment_count_column.pyc
Normal file
Binary file not shown.
26
db.py
Normal file
26
db.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
from sqlalchemy import Column, Boolean, Integer, String, DateTime, Float, create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker, scoped_session
|
||||||
|
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
class TrainedItem(Base):
|
||||||
|
__tablename__ = 'trained_items'
|
||||||
|
name = Column(String, primary_key=True)
|
||||||
|
state = Column(Boolean)
|
||||||
|
|
||||||
|
class FoundItem(Base):
|
||||||
|
__tablename__ = 'found_items'
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
hnid = Column(String)
|
||||||
|
comment_count = Column(Integer)
|
||||||
|
name = Column(String)
|
||||||
|
url = Column(String, unique=True, index=True)
|
||||||
|
date = Column(DateTime, index=True)
|
||||||
|
rating = Column(Float)
|
||||||
|
|
||||||
|
engine = create_engine('sqlite:///hnlearn.db')
|
||||||
|
Base.metadata.create_all(engine)
|
||||||
|
session_factory = sessionmaker(bind=engine)
|
||||||
|
Session = scoped_session(session_factory)
|
||||||
26
learn.py
Normal file
26
learn.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
from sklearn.linear_model import SGDClassifier
|
||||||
|
|
||||||
|
class Classifier(object):
|
||||||
|
def __init__(self, datagrabber):
|
||||||
|
self.grabber = datagrabber
|
||||||
|
self.reload()
|
||||||
|
|
||||||
|
def reload(self):
|
||||||
|
Xs, Ys = self.grabber()
|
||||||
|
|
||||||
|
self.vect = CountVectorizer(analyzer='word',ngram_range=(1,3))
|
||||||
|
self.train_vec = self.vect.fit_transform(Xs)
|
||||||
|
|
||||||
|
self.clf = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, n_iter=500, random_state=42)
|
||||||
|
self.text_clf = self.clf.fit(self.train_vec, Ys)
|
||||||
|
|
||||||
|
def scan(self, name):
|
||||||
|
v = self.vect.transform([name])
|
||||||
|
return self.text_clf.decision_function(v)[0]
|
||||||
|
|
||||||
|
def add(self, name, state):
|
||||||
|
# implement add using partial_fit
|
||||||
|
# this would mean switching to hashing vectorizer, which means we can't reverse the model
|
||||||
|
# so for now we're just going to reload completely
|
||||||
|
self.reload()
|
||||||
64
views/index.tpl
Normal file
64
views/index.tpl
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.css"/>
|
||||||
|
|
||||||
|
<script type="text/javascript" src="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.js"></script>
|
||||||
|
<script>
|
||||||
|
$(document).ready(function()
|
||||||
|
{
|
||||||
|
$('#table').DataTable({
|
||||||
|
"order": [[{{sortby}}, 'desc']],
|
||||||
|
"paging": false
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<nav class="navbar navbar-default">
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="navbar-header">
|
||||||
|
<a class="navbar-brand" href="#">HNLearn</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="collapse navbar-collapse">
|
||||||
|
<ul class="nav navbar-nav">
|
||||||
|
<li><a href="/">Latest > 0</a></li>
|
||||||
|
<li><a href="/?all=true">Latest with < 0</a></li>
|
||||||
|
<li><a href="/?limit=day&all=true">Top for Past Day</a></li>
|
||||||
|
<li><a href="/?limit=week&all=true">Top for Past Week</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
<table id="table" class="table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>Date</th>
|
||||||
|
<th>Score</th>
|
||||||
|
<th>Up/Down</th>
|
||||||
|
<th>Comments</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
%for x in items:
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="{{x.url}}">{{x.name}}</a>
|
||||||
|
</td>
|
||||||
|
<td>{{x.date.strftime("%Y-%m-%d %H:%M")}}</td>
|
||||||
|
<td>{{"{0:.4g}".format(x.rating)}}</td>
|
||||||
|
<td><a href="/rate/{{x.id}}?rating=good">Up</a> / <a href="/rate/{{x.id}}?rating=bad">Down</a></td>
|
||||||
|
<td>
|
||||||
|
<a href="https://news.ycombinator.com/?item={{x.hnid}}">
|
||||||
|
{{x.comment_count}} comments
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
%end
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
57
views/index.tpl~
Normal file
57
views/index.tpl~
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.css"/>
|
||||||
|
|
||||||
|
<script type="text/javascript" src="https://cdn.datatables.net/v/bs-3.3.6/jq-2.2.3/dt-1.10.12/datatables.js"></script>
|
||||||
|
<script>
|
||||||
|
$(document).ready(function()
|
||||||
|
{
|
||||||
|
$('#table').DataTable({
|
||||||
|
"order": [[{{sortby}}, 'desc']],
|
||||||
|
"paging": false
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<nav class="navbar navbar-default">
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="navbar-header">
|
||||||
|
<a class="navbar-brand" href="#">HNLearn</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="collapse navbar-collapse">
|
||||||
|
<ul class="nav navbar-nav">
|
||||||
|
<li><a href="/">Latest > 0</a></li>
|
||||||
|
<li><a href="/?all=true">Latest with < 0</a></li>
|
||||||
|
<li><a href="/?limit=day&all=true">Top for Past Day</a></li>
|
||||||
|
<li><a href="/?limit=week&all=true">Top for Past Week</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
<table id="table" class="table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>Date</th>
|
||||||
|
<th>Score</th>
|
||||||
|
<th>Up/Down</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
%for x in items:
|
||||||
|
<tr>
|
||||||
|
<td><a href="{{x.url}}">{{x.name}}</a></td>
|
||||||
|
<td>{{x.date.strftime("%Y-%m-%d %H:%M")}}</td>
|
||||||
|
<td>{{"{0:.4g}".format(x.rating)}}</td>
|
||||||
|
<td><a href="/rate/{{x.id}}?rating=good">Up</a> / <a href="/rate/{{x.id}}?rating=bad">Down</a></td>
|
||||||
|
|
||||||
|
</tr>
|
||||||
|
%end
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
127
web.py
Normal file
127
web.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
from db import *
|
||||||
|
from learn import *
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import bottle
|
||||||
|
from threading import Thread
|
||||||
|
import schedule
|
||||||
|
import time
|
||||||
|
|
||||||
|
def grab_all_data():
|
||||||
|
session = Session()
|
||||||
|
Xs = []
|
||||||
|
Ys = []
|
||||||
|
for name, state in session.query(TrainedItem.name, TrainedItem.state):
|
||||||
|
Xs.append(name)
|
||||||
|
Ys.append(state)
|
||||||
|
return (Xs, Ys)
|
||||||
|
|
||||||
|
def import_data():
|
||||||
|
session = Session()
|
||||||
|
trained = load_files('data', shuffle=True)
|
||||||
|
|
||||||
|
for i in range(len(trained.data)):
|
||||||
|
item = TrainedItem(name=trained.data[i].decode('utf8'), state=bool(trained.target[i]))
|
||||||
|
session.add(item)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
# import_data()
|
||||||
|
clf = Classifier(grab_all_data)
|
||||||
|
|
||||||
|
@bottle.route("/update")
|
||||||
|
def updateHN():
|
||||||
|
print "Updating HN..."
|
||||||
|
session = Session()
|
||||||
|
resp = requests.get("https://news.ycombinator.com/")
|
||||||
|
soup = BeautifulSoup(resp.text, "lxml")
|
||||||
|
for t in soup.find_all('td', align=None, class_='title'):
|
||||||
|
parent_tr = t.parent
|
||||||
|
url = t.find('a', class_="storylink")
|
||||||
|
if url is not None:
|
||||||
|
url = url["href"]
|
||||||
|
print parent_tr
|
||||||
|
hnid = parent_tr["id"]
|
||||||
|
comment_count_text = unicode(parent_tr.next_sibling.find_all('a', href="item?id=" + hnid)[-1].text).replace(u"\xa0comments", "")
|
||||||
|
comment_count = 0
|
||||||
|
try:
|
||||||
|
comment_count = int(comment_count_text)
|
||||||
|
except:
|
||||||
|
print repr(comment_count_text)
|
||||||
|
pass
|
||||||
|
check = session.query(FoundItem).filter(FoundItem.url == url).one_or_none()
|
||||||
|
print url, check
|
||||||
|
if check is None:
|
||||||
|
print url
|
||||||
|
item = FoundItem(name=t.text,
|
||||||
|
hnid=hnid,
|
||||||
|
comment_count=comment_count,
|
||||||
|
url=url,
|
||||||
|
date=datetime.datetime.now(),
|
||||||
|
rating=clf.scan(unicode(t.text)))
|
||||||
|
session.add(item)
|
||||||
|
else:
|
||||||
|
check.hnid = hnid
|
||||||
|
check.comment_count = comment_count
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
class SchedThread(Thread):
|
||||||
|
def __init__(self):
|
||||||
|
Thread.__init__(self)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
while True:
|
||||||
|
schedule.run_pending()
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
@bottle.route('/')
|
||||||
|
@bottle.view('index.tpl')
|
||||||
|
def news():
|
||||||
|
# load news from DB and display
|
||||||
|
session = Session()
|
||||||
|
|
||||||
|
sortCol = 1
|
||||||
|
|
||||||
|
items = session.query(FoundItem)
|
||||||
|
|
||||||
|
showUnder = bottle.request.params.get("all") == "true"
|
||||||
|
if not showUnder:
|
||||||
|
items = items.filter(FoundItem.rating > 0)
|
||||||
|
|
||||||
|
if bottle.request.params.get("limit") == "week":
|
||||||
|
ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
||||||
|
items = items.filter(FoundItem.date > ago) # past week
|
||||||
|
sortCol = 2
|
||||||
|
elif bottle.request.params.get("limit") == "day":
|
||||||
|
ago = datetime.datetime.now() - datetime.timedelta(days=1)
|
||||||
|
items = items.filter(FoundItem.date > ago) # past day
|
||||||
|
sortCol = 2
|
||||||
|
else:
|
||||||
|
items = items.order_by(FoundItem.date.desc()).limit(100)
|
||||||
|
|
||||||
|
return dict(items=items, sortby=sortCol)
|
||||||
|
|
||||||
|
@bottle.route('/rate/<id:int>')
|
||||||
|
def rate(id):
|
||||||
|
session = Session()
|
||||||
|
rating = bottle.request.params.get('rating') == "good"
|
||||||
|
item = session.query(FoundItem).filter(FoundItem.id == id).one()
|
||||||
|
# insert or update
|
||||||
|
session.merge(TrainedItem(name=item.name, state=rating))
|
||||||
|
session.commit()
|
||||||
|
clf.add(item.name, rating)
|
||||||
|
# re-rate all items in DB
|
||||||
|
for item in session.query(FoundItem):
|
||||||
|
item.rating = clf.scan(item.name)
|
||||||
|
session.commit()
|
||||||
|
bottle.redirect("/")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
schedule.every(10).minutes.do(updateHN)
|
||||||
|
st = SchedThread()
|
||||||
|
#st.daemon = True
|
||||||
|
st.start()
|
||||||
|
bottle.run(host="0.0.0.0",port=55512)
|
||||||
Reference in New Issue
Block a user