Alles Habr in einer Basis. Kommentare und Webanwendung

KDPV


Teil 1 | mega.nz | Online-Demo | Github


Wahrscheinlich ist dies eine Fortsetzung des Artikels, in dem ich Habr in die Datenbank packe. Jetzt ist die Zeit, es anzuwenden.


Kleine Änderungen, die das Leben leichter machen


Datenbankstruktur


Wir beschleunigen die Datenbank, indem wir einen Index erstellen.


CREATE INDEX article_indx ON articles(id);
CREATE INDEX comments_indx ON comments(article);

Die Anforderungszeit zum Erstellen des Index beträgt 1 s.


Die Anforderungszeit der ersten Zeile beträgt 5 ms.


— 10 .



, (SSD ).


- ( ). sqlite3worker . , , .


from sqlite3worker import Sqlite3Worker
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import json
import requests
import logging

sql_worker = Sqlite3Worker("habr.db")
sql_worker.execute("CREATE TABLE IF NOT EXISTS comments(id INTEGER,"
                   "parent_id INTEGER,"
                   "article INTEGER,"
                   "level INTEGER,"
                   "timePublished TEXT,"
                   "score INTEGER,"
                   "message TEXT,"
                   "children TEXT,"
                   "author TEXT)")

def worker(i):
    url = "https://m.habr.com/kek/v2/articles/{}/comments/?fl=ru%2Cen&hl=ru".format(i)

    try:
        r = requests.get(url)
        if r.status_code == 503:
            logging.critical("503 Error")
            raise SystemExit
    except:
        with open("req_errors.txt", "a") as file:
            logging.critical("requests error")
            file.write(str(i))
        return 2

    try: data = json.loads(r.text)
    except: logging.warning("[{}] Json loads failed".format(i))

    if data['success']:
        comments = data['data']['comments']

        for comment in comments:
            current = comments[comment]

            id = current['id']
            parent_id = current['parentId']
            article = i
            level = current['level']
            time_published = current['timePublished']
            score = current['score']
            message = current['message']
            children = [children for children in current['children']]
            author = current['author']

            try: data = (id,
                    parent_id,
                    article,
                    level,
                    time_published,
                    score,
                    message,
                    str(children),
                    str(author['login']))
            except:
                data = (None, None, None, None, None, None, None, None, None)

            sql_worker.execute("INSERT INTO comments VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", data)

        logging.info("Comments on article {} were parsed".format(i))

min = 490000
max = 495000
pool = ThreadPool(3)

start_time = datetime.now()
results = pool.map(worker, range(min, max))

pool.close()
pool.join()
sql_worker.close()
print(datetime.now() - start_time)

dug .


.


-


, , -.
Flask, , .



Flask, .
DEBUG — — . .. , .


from flask import Flask, g, render_template, request, redirect, url_for
import sqlite3

app = Flask(__name__)
app.config['DEBUG'] = True
app.config['SECRET_KEY'] = 'supersecretkey'

DATABASE = "./habr.db"

-


.


def get_db():
    db = getattr(g, '_database', None)
    if db is None:
        db = g._database = sqlite3.connect(DATABASE)
    return db


. , . , .


@app.route("/")
def index():
    return render_template("index.html")


/post/id, .


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()
    res = cur.execute("SELECT * FROM articles WHERE id = :id", {"id": post_id} )
    article = res.fetchone()

    return render_template("post.html", post=article)


/post/id page

. Flask html- .
templates html-, Jinja.


, , title , .


, {{ var }} = print(var) Python.


<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
                         <meta http-equiv="X-UA-Compatible" content="ie=edge">
             <title>{{ post[3] }}</title>
</head>
<body>

</body>
</html>


.
.


<div class="header">
    <h1 class="display-4">{{ post[3] }}</h1>
    <br>
    <p class="">
        <strong>{{ date }}</strong>  <strong> {{ post[2] }}</strong>
    </p>

    <!--      ,     -->
    {% if post[9] == 1 %}
        <mark>Tutorial</mark> <br>
    {% endif %}

    <!--  6 - , 8 - , 7 -    -->
    <i class="fas fa-comments"></i> {{ post[6] }} |
    <i class="fas fa-arrows-alt-v"></i> {{ post[8] }} |
    <i class="fas fa-eye"></i> {{ post[7] }} 
</div>
    <div class="content">
        <div>
            <!-- safe -  ,   ,          -->
            <p>{{ post[4]|safe }}</p>
        </div>
        <br>
        <strong><i class="fas fa-tags"></i> :</strong> {{ post[10] }}
    </div>
</div>


. comments.
,


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()

    ...

    res = cur.execute("SELECT * FROM comments WHERE article = :id", {"id": post_id} )
    comments = res.fetchall()

    return render_template("post.html", post=article, comments=comments)

:


<div id="comments">
    <h2></h2>
    {% for comment in comments %}
        <div id="{{ comment[0] }}" class="shadow-sm p-3 mb-5 bg-light rounded">
            <div class="comment-header">
                <small>#{{comment[0]}}</small>
                <strong>{{ comment[8] }}</strong>
                <small>{{comment[4]}} | <i class="fas fa-arrows-alt-v"></i> {{ comment[5] }}</small>
            </div>
            {% if comment[1] != 0 %}
                <small><a href="#{{comment[1]}}"><mark>  #{{ comment[1] }}</mark></a></small>
                {{comment[6]|safe}}
            {% else %}
                {{comment[6]|safe}}
            {% endif %}
            <br>
        </div>
    {% endfor %}
</div>

Jinja {% %}


Index page

— .


, , .


POST- :
main.py


@app.route('/', methods=['POST'])
def index_post():
    text = request.form['url']
    id = ''.join(x for x in text if x.isdigit())
    if id != '':
        return redirect(url_for('show_post', post_id=id))
    else: return " "

index.html


...
<form method="POST">
    <input type="url" name="url">
    <div>
        <button type="submit" id="search"></button>
    </div>
</form>
...

:


@app.route("/")
def index():
    cur = get_db().cursor()
    res = cur.execute("SELECT min(id), max(id) FROM articles")
    counter = res.fetchone()
    return render_template("index.html", counter = counter)

...
<div class="container">
        ...

        <p>   = {{ counter[0] }}..{{ counter [1] }}</p>

        ...
</div>
...

:


-


  • , - . . , ISO 8601, .

: .


import dateutil.parser
from datetime import datetime
...
@app.route('/post/<int:post_id>/')
def show_post(post_id):
    ...
    date = dateutil.parser.parse(article[1])
    date = datetime.strftime(date, "%d.%m.%Y %H:%M")
    ...
    return render_template("post.html", post=article, date=date, comments=comments)

  • lazy load , , :


    //  
    (async () => {
    for (let node of document.getElementsByTagName('img')) {
        await new Promise(res => {
            if(node.dataset.src !== undefined){
                node.src = node.dataset.src;
                node.onload = () => res();
            }
        })
    }
    })();

    , , , data-src


  • , "" :


    $(document).on('click', '.spoiler_title', function (e) {
    e.preventDefault();
    $(this).toggleClass('active');
    $(this).parent().find('.spoiler_text').first().slideToggle(300);
    });

  • . , Darkmode.js


    <script src="https://cdn.jsdelivr.net/npm/darkmode-js@1.5.5/lib/darkmode-js.min.js"></script>
    <script>
    new Darkmode().showWidget();
    </script>



, - , - ( ).



( !) flask .


export FLASK_APP=main.py
export FLASK_ENV=production
flask run

, , , , - uWSGI, nginx.



uWSGI, , , .


pip install uwsgi
uwsgi --socket 0.0.0.0:3031 --protocol http --wsgi-file main.py --callable app

, — , nginx,



, , . , - .


, , ( , ).


github.


Aufgrund meines schwachen VPS und möglicher Habraeffekte ist die Site möglicherweise nicht verfügbar. Es ist großartig, wenn jemand einen ähnlichen Server hochhebt.


All Articles