From 66cc8b6ad50f6044407d8eb26564e4e02a35257a Mon Sep 17 00:00:00 2001 From: ultra Date: Mon, 5 Dec 2016 21:22:44 -0500 Subject: [PATCH] initial import --- alembic.ini | 68 ++++++++++ alembic/README | 1 + alembic/env.py | 73 ++++++++++ alembic/env.pyc | Bin 0 -> 1904 bytes alembic/script.py.mako | 24 ++++ .../131d925693a4_added_hnid_column.py | 28 ++++ .../131d925693a4_added_hnid_column.pyc | Bin 0 -> 1030 bytes .../d1f648fcd62a_create_initial_state.py | 34 +++++ .../d1f648fcd62a_create_initial_state.pyc | Bin 0 -> 1320 bytes .../d1f648fcd62a_create_initial_state.py~ | 25 ++++ ...e07ebf603ff2_added_comment_count_column.py | 28 ++++ ...07ebf603ff2_added_comment_count_column.pyc | Bin 0 -> 1076 bytes db.py | 26 ++++ learn.py | 26 ++++ views/index.tpl | 64 +++++++++ views/index.tpl~ | 57 ++++++++ web.py | 127 ++++++++++++++++++ 17 files changed, 581 insertions(+) create mode 100644 alembic.ini create mode 100644 alembic/README create mode 100644 alembic/env.py create mode 100644 alembic/env.pyc create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/131d925693a4_added_hnid_column.py create mode 100644 alembic/versions/131d925693a4_added_hnid_column.pyc create mode 100644 alembic/versions/d1f648fcd62a_create_initial_state.py create mode 100644 alembic/versions/d1f648fcd62a_create_initial_state.pyc create mode 100644 alembic/versions/d1f648fcd62a_create_initial_state.py~ create mode 100644 alembic/versions/e07ebf603ff2_added_comment_count_column.py create mode 100644 alembic/versions/e07ebf603ff2_added_comment_count_column.pyc create mode 100644 db.py create mode 100644 learn.py create mode 100644 views/index.tpl create mode 100644 views/index.tpl~ create mode 100644 web.py diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..d6fac53 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,68 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = sqlite:///hnlearn.db + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..9e5a106 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,73 @@ +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +import sys +sys.path.append(".") +from db import Base +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/env.pyc b/alembic/env.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bd2c4eef54e829e1fbfa387212803900a0e1d50 GIT binary patch literal 1904 zcmZ{kTaOzx6vvMAXP!UB7|TUDpjghWhiQumyt7bk~nxgVeHw9 z)Jm1g3r~C=-uXs+0651pyWvuz#Ib#Rj(z_A`9yzAM<0)$yvm^cbm;%9w*c;U>g4nl zU;qjMO96t%9WWgzB3MSvI0Dm!q6fb zv3g*}up9%9VK;%@6!QL^Z2;yT6i2W;q6kN@o5Auvznxj&1a?cckZ#XbdePiT*%wzANhN_kyc zC$*K2%Jte%4v%C}=C~M*Bz<0NwUbRgb%jp)E%K0am#mH4)y_*Le@LE%^~&bbmNaAK zw?UD|DgbkZWGOIA0Hn?1& z%uD;oOT0uYEn-fm_~z{E=TD_C<%+^xDs6a&Hj{H@44uEcJ^li(F{?e!KapX3EVK{1 zZ8S%8z6#ryJQsF!S*7=0mbKs4z76V_5%-f(Cfj0hv#2ZMBbws>BfdLTZP2F$dX+1p zXi9TNd#sd6SK8+8Vm{<$d@U_C>=t~KwrRQN>q!ER@!cA_i6po&I4`Oi{e9}JkO)kD zrL0pKy9I!&+NOtl4!k9d*PMP$6LL@niQY~n?rop)7x=&ocq*6e1_vrz1V(u4e-bc6n58O{JT`tk+GPJ7FCGppg#FQ{> zMA0qBcXTV~%1j3vqhv8?-8_cBy^a=;aI^ag*ZSIx`E&^q3WvFM9Qhmq-vK{OR}b_+?T>ALoWbebl- zHk`G^u>qe`Kj_xT5mMx%n{q?gZx)RV3BQO&s~PoLZMf{FR?#bDH+SQj(yaV8T#wrC z(dJYxsi3YBeu_c7+eaI&BH?h!2W(@lw7Y!|DHqFwK%w`(7sXnCL2GM+U-ENyA5t?D ZGx1Q&x^Z{fpHP1$;$eImKbVM7=P!Cny+Z&1 literal 0 HcmV?d00001 diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/131d925693a4_added_hnid_column.py b/alembic/versions/131d925693a4_added_hnid_column.py new file mode 100644 index 0000000..d7c3db6 --- /dev/null +++ b/alembic/versions/131d925693a4_added_hnid_column.py @@ -0,0 +1,28 @@ +"""added hnid column + +Revision ID: 131d925693a4 +Revises: d1f648fcd62a +Create Date: 2016-12-05 19:12:17.872699 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '131d925693a4' +down_revision = 'd1f648fcd62a' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('found_items', sa.Column('hnid', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('found_items', 'hnid') + # ### end Alembic commands ### diff --git a/alembic/versions/131d925693a4_added_hnid_column.pyc b/alembic/versions/131d925693a4_added_hnid_column.pyc new file mode 100644 index 0000000000000000000000000000000000000000..056dd4b829592dabb8f2c77da6451fbff467f3b9 GIT binary patch literal 1030 zcmcJNL2sKd6vrQsqzP@MO50^OjvUrfXqz_CG)+oXX_siCJ4{@VgPn%Tm=v2%J!ju| zA7FdWK}kNq2J)Y08$bL1d-i^x`pkX!Q9$>&`2B)Se?#E_8L9)^1?fQLLgs=)eGjAu zSr2#*vOe%WWCI$^hE_+w2k;9Q`@oNEa7^Qm;gB**6vnyW!Ag}pC~DbO%J)<8TI#Y^ z!Pom~5MRZ7lH80ZSL}K_BlI-j@p629w=DQLVg9TU%!uF~Wf~-x@%TJW&M$9*cruNX zX?%NecbkkS6W=fY;1fEyt08ywdTvhed$@%iILyA|afE-)u<4H|2H+XZJCx|zBPta1 zV1tl6Fn!p#&V~p9dhOwYxj9B%)~(`sX+)(BjgoCcC{!yYTSyTOu#;i~YvKm_<)_xMo8DdTFi4^7kO^uj?%sy`b9EXtVH#e5)NCrrn{`> zw9B3EMr_?{CG6CqVX9c=SY;u#EiM;pp}5X#l@fuJ68uN+Lrnjvs(CBLXF9z;Lm4<{ N&Pi|J*w-5k{sJlK=L7%% literal 0 HcmV?d00001 diff --git a/alembic/versions/d1f648fcd62a_create_initial_state.py b/alembic/versions/d1f648fcd62a_create_initial_state.py new file mode 100644 index 0000000..b39e9cb --- /dev/null +++ b/alembic/versions/d1f648fcd62a_create_initial_state.py @@ -0,0 +1,34 @@ +"""create initial state + +Revision ID: d1f648fcd62a +Revises: +Create Date: 2016-12-05 18:37:48.117437 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'd1f648fcd62a' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table('trained_items', + sa.Column('name', sa.String, primary_key=True), + sa.Column('state', sa.Boolean)) + op.create_table('found_items', + sa.Column('id', sa.Integer, primary_key=True), + sa.Column('name', sa.String), + sa.Column('url', sa.String, unique=True, index=True), + sa.Column('date', sa.DateTime, index=True), + sa.Column('rating', sa.Float)) + pass + + +def downgrade(): + op.drop_table('trained_items') + op.drop_table('found_items') diff --git a/alembic/versions/d1f648fcd62a_create_initial_state.pyc b/alembic/versions/d1f648fcd62a_create_initial_state.pyc new file mode 100644 index 0000000000000000000000000000000000000000..288fc2f3e514068de4918acc17827ac5f7db6e37 GIT binary patch literal 1320 zcmcIkO;6iE5Z#RvlNeH=wnChckGVhyP(V>tRfSgK619Otm5a6UE@bI?9oB2woawLX z-|Df4c4q7ZdT$+hc0N2aes9JJes-fzzutdNS$#F&d<%E+5rkxH0+KV?V1~1-!6psH z(GHji*rds1lTBJ^PS!}aNp_fQvG0%^F&UDwj{eKu8dk<$sZ+wMSX=G2Fmd6*ilVW) z*M-h)d~rUE<>31G==3_3#|NT{sA3pLXI17oBn;z&{lW3>;9z(ERXjKy9-a)3PWJ|b zlcU3vDAK>->Tzi15mY`O`3*RqmR+GFY`zyr+$J$#m%GP9&!dIxz_>&iiS z+>v*AZj`X3Us$hhl=E%yW>4f$=to|4Tp{I=s$uSsL|*{+~9%5%NvG(F{5Lf=>4j{NyG% literal 0 HcmV?d00001 diff --git a/alembic/versions/d1f648fcd62a_create_initial_state.py~ b/alembic/versions/d1f648fcd62a_create_initial_state.py~ new file mode 100644 index 0000000..ec1113c --- /dev/null +++ b/alembic/versions/d1f648fcd62a_create_initial_state.py~ @@ -0,0 +1,25 @@ +"""create initial state + +Revision ID: d1f648fcd62a +Revises: +Create Date: 2016-12-05 18:37:48.117437 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'd1f648fcd62a' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + + pass + + +def downgrade(): + op.drop_table('') diff --git a/alembic/versions/e07ebf603ff2_added_comment_count_column.py b/alembic/versions/e07ebf603ff2_added_comment_count_column.py new file mode 100644 index 0000000..f7952e1 --- /dev/null +++ b/alembic/versions/e07ebf603ff2_added_comment_count_column.py @@ -0,0 +1,28 @@ +"""added comment_count column + +Revision ID: e07ebf603ff2 +Revises: 131d925693a4 +Create Date: 2016-12-05 19:49:45.783201 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'e07ebf603ff2' +down_revision = '131d925693a4' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('found_items', sa.Column('comment_count', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('found_items', 'comment_count') + # ### end Alembic commands ### diff --git a/alembic/versions/e07ebf603ff2_added_comment_count_column.pyc b/alembic/versions/e07ebf603ff2_added_comment_count_column.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9524379ffd3c98ec7e274e9b9fbaed82a62a9d6 GIT binary patch literal 1076 zcmchV&2HL25XZ+hIA9VfQhVvmM=CWjAAuB9Rh5V;AY70R6fPt#I2?N* zJxCfL8jv(WG$HXhnY65JgYe-eeH?%|vrdQeU%D;cxh9hdfg;SyqCjQRysVX>NY;f4 zf*7CHdR?mU{%#s#?-m!!ac{U>4t6W3r(u8C7n8wgJQ-#;!K}i}V0cGn8V-8>@l}6t z)f_-<&J3^5b{Rg|y)yWKmEl@ns~WpB!_nwZ6h^DEz^Im{%A%E$m{lsuBo>Qx z9z9{j$aQoIBeh+k!Y5tYALDxSY`AW{d8jghFUZx*6x;(ak0J}~B{>IRgz5l>--vT$NgicEpGm4lm%m0ATqB`%2 YQq&UPvkZMn#&<5A^M>!(pV#*P030y + + + + + + + +
+ + + + + + + + + + + + +%for x in items: + + + + + + + +%end + +
TitleDateScoreUp/DownComments
+ {{x.name}} + {{x.date.strftime("%Y-%m-%d %H:%M")}}{{"{0:.4g}".format(x.rating)}}Up / Down + + {{x.comment_count}} comments + +
+
+ + diff --git a/views/index.tpl~ b/views/index.tpl~ new file mode 100644 index 0000000..2cb77e8 --- /dev/null +++ b/views/index.tpl~ @@ -0,0 +1,57 @@ + + + + + + + + +
+ + + + + + + + + + + +%for x in items: + + + + + + + +%end + +
TitleDateScoreUp/Down
{{x.name}}{{x.date.strftime("%Y-%m-%d %H:%M")}}{{"{0:.4g}".format(x.rating)}}Up / Down
+
+ + \ No newline at end of file diff --git a/web.py b/web.py new file mode 100644 index 0000000..bead3be --- /dev/null +++ b/web.py @@ -0,0 +1,127 @@ +from db import * +from learn import * +import datetime + +import requests +from bs4 import BeautifulSoup + +import bottle +from threading import Thread +import schedule +import time + +def grab_all_data(): + session = Session() + Xs = [] + Ys = [] + for name, state in session.query(TrainedItem.name, TrainedItem.state): + Xs.append(name) + Ys.append(state) + return (Xs, Ys) + +def import_data(): + session = Session() + trained = load_files('data', shuffle=True) + + for i in range(len(trained.data)): + item = TrainedItem(name=trained.data[i].decode('utf8'), state=bool(trained.target[i])) + session.add(item) + + session.commit() + +# import_data() +clf = Classifier(grab_all_data) + +@bottle.route("/update") +def updateHN(): + print "Updating HN..." + session = Session() + resp = requests.get("https://news.ycombinator.com/") + soup = BeautifulSoup(resp.text, "lxml") + for t in soup.find_all('td', align=None, class_='title'): + parent_tr = t.parent + url = t.find('a', class_="storylink") + if url is not None: + url = url["href"] + print parent_tr + hnid = parent_tr["id"] + comment_count_text = unicode(parent_tr.next_sibling.find_all('a', href="item?id=" + hnid)[-1].text).replace(u"\xa0comments", "") + comment_count = 0 + try: + comment_count = int(comment_count_text) + except: + print repr(comment_count_text) + pass + check = session.query(FoundItem).filter(FoundItem.url == url).one_or_none() + print url, check + if check is None: + print url + item = FoundItem(name=t.text, + hnid=hnid, + comment_count=comment_count, + url=url, + date=datetime.datetime.now(), + rating=clf.scan(unicode(t.text))) + session.add(item) + else: + check.hnid = hnid + check.comment_count = comment_count + session.commit() + +class SchedThread(Thread): + def __init__(self): + Thread.__init__(self) + + def run(self): + while True: + schedule.run_pending() + time.sleep(1) + +@bottle.route('/') +@bottle.view('index.tpl') +def news(): + # load news from DB and display + session = Session() + + sortCol = 1 + + items = session.query(FoundItem) + + showUnder = bottle.request.params.get("all") == "true" + if not showUnder: + items = items.filter(FoundItem.rating > 0) + + if bottle.request.params.get("limit") == "week": + ago = datetime.datetime.now() - datetime.timedelta(days=7) + items = items.filter(FoundItem.date > ago) # past week + sortCol = 2 + elif bottle.request.params.get("limit") == "day": + ago = datetime.datetime.now() - datetime.timedelta(days=1) + items = items.filter(FoundItem.date > ago) # past day + sortCol = 2 + else: + items = items.order_by(FoundItem.date.desc()).limit(100) + + return dict(items=items, sortby=sortCol) + +@bottle.route('/rate/') +def rate(id): + session = Session() + rating = bottle.request.params.get('rating') == "good" + item = session.query(FoundItem).filter(FoundItem.id == id).one() + # insert or update + session.merge(TrainedItem(name=item.name, state=rating)) + session.commit() + clf.add(item.name, rating) + # re-rate all items in DB + for item in session.query(FoundItem): + item.rating = clf.scan(item.name) + session.commit() + bottle.redirect("/") + +if __name__ == "__main__": + schedule.every(10).minutes.do(updateHN) + st = SchedThread() + #st.daemon = True + st.start() + bottle.run(host="0.0.0.0",port=55512)