Semua Habr dalam satu basis. Komentar dan aplikasi web

KDPV


Bagian 1 | mega.nz | Demo online | Github


Mungkin, ini adalah kelanjutan dari artikel di mana saya membagi Habr ke dalam database. Sekarang saatnya untuk menerapkannya.


Pengeditan kecil yang membuat hidup lebih mudah


Struktur basis data


Kami mempercepat database dengan membuat indeks.


CREATE INDEX article_indx ON articles(id);
CREATE INDEX comments_indx ON comments(article);

Waktu permintaan untuk membuat indeks adalah 1 detik.


Waktu permintaan baris pertama adalah 5 ms.


— 10 .



, (SSD ).


- ( ). sqlite3worker . , , .


from sqlite3worker import Sqlite3Worker
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import json
import requests
import logging

sql_worker = Sqlite3Worker("habr.db")
sql_worker.execute("CREATE TABLE IF NOT EXISTS comments(id INTEGER,"
                   "parent_id INTEGER,"
                   "article INTEGER,"
                   "level INTEGER,"
                   "timePublished TEXT,"
                   "score INTEGER,"
                   "message TEXT,"
                   "children TEXT,"
                   "author TEXT)")

def worker(i):
    url = "https://m.habr.com/kek/v2/articles/{}/comments/?fl=ru%2Cen&hl=ru".format(i)

    try:
        r = requests.get(url)
        if r.status_code == 503:
            logging.critical("503 Error")
            raise SystemExit
    except:
        with open("req_errors.txt", "a") as file:
            logging.critical("requests error")
            file.write(str(i))
        return 2

    try: data = json.loads(r.text)
    except: logging.warning("[{}] Json loads failed".format(i))

    if data['success']:
        comments = data['data']['comments']

        for comment in comments:
            current = comments[comment]

            id = current['id']
            parent_id = current['parentId']
            article = i
            level = current['level']
            time_published = current['timePublished']
            score = current['score']
            message = current['message']
            children = [children for children in current['children']]
            author = current['author']

            try: data = (id,
                    parent_id,
                    article,
                    level,
                    time_published,
                    score,
                    message,
                    str(children),
                    str(author['login']))
            except:
                data = (None, None, None, None, None, None, None, None, None)

            sql_worker.execute("INSERT INTO comments VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", data)

        logging.info("Comments on article {} were parsed".format(i))

min = 490000
max = 495000
pool = ThreadPool(3)

start_time = datetime.now()
results = pool.map(worker, range(min, max))

pool.close()
pool.join()
sql_worker.close()
print(datetime.now() - start_time)

dug .


.


-


, , -.
Flask, , .



Flask, .
DEBUG — — . .. , .


from flask import Flask, g, render_template, request, redirect, url_for
import sqlite3

app = Flask(__name__)
app.config['DEBUG'] = True
app.config['SECRET_KEY'] = 'supersecretkey'

DATABASE = "./habr.db"

-


.


def get_db():
    db = getattr(g, '_database', None)
    if db is None:
        db = g._database = sqlite3.connect(DATABASE)
    return db


. , . , .


@app.route("/")
def index():
    return render_template("index.html")


/post/id, .


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()
    res = cur.execute("SELECT * FROM articles WHERE id = :id", {"id": post_id} )
    article = res.fetchone()

    return render_template("post.html", post=article)


/post/id page

. Flask html- .
templates html-, Jinja.


, , title , .


, {{ var }} = print(var) Python.


<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
                         <meta http-equiv="X-UA-Compatible" content="ie=edge">
             <title>{{ post[3] }}</title>
</head>
<body>

</body>
</html>


.
.


<div class="header">
    <h1 class="display-4">{{ post[3] }}</h1>
    <br>
    <p class="">
        <strong>{{ date }}</strong>  <strong> {{ post[2] }}</strong>
    </p>

    <!--      ,     -->
    {% if post[9] == 1 %}
        <mark>Tutorial</mark> <br>
    {% endif %}

    <!--  6 - , 8 - , 7 -    -->
    <i class="fas fa-comments"></i> {{ post[6] }} |
    <i class="fas fa-arrows-alt-v"></i> {{ post[8] }} |
    <i class="fas fa-eye"></i> {{ post[7] }} 
</div>
    <div class="content">
        <div>
            <!-- safe -  ,   ,          -->
            <p>{{ post[4]|safe }}</p>
        </div>
        <br>
        <strong><i class="fas fa-tags"></i> :</strong> {{ post[10] }}
    </div>
</div>


. comments.
,


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()

    ...

    res = cur.execute("SELECT * FROM comments WHERE article = :id", {"id": post_id} )
    comments = res.fetchall()

    return render_template("post.html", post=article, comments=comments)

:


<div id="comments">
    <h2></h2>
    {% for comment in comments %}
        <div id="{{ comment[0] }}" class="shadow-sm p-3 mb-5 bg-light rounded">
            <div class="comment-header">
                <small>#{{comment[0]}}</small>
                <strong>{{ comment[8] }}</strong>
                <small>{{comment[4]}} | <i class="fas fa-arrows-alt-v"></i> {{ comment[5] }}</small>
            </div>
            {% if comment[1] != 0 %}
                <small><a href="#{{comment[1]}}"><mark>  #{{ comment[1] }}</mark></a></small>
                {{comment[6]|safe}}
            {% else %}
                {{comment[6]|safe}}
            {% endif %}
            <br>
        </div>
    {% endfor %}
</div>

Jinja {% %}


Index page

— .


, , .


POST- :
main.py


@app.route('/', methods=['POST'])
def index_post():
    text = request.form['url']
    id = ''.join(x for x in text if x.isdigit())
    if id != '':
        return redirect(url_for('show_post', post_id=id))
    else: return " "

index.html


...
<form method="POST">
    <input type="url" name="url">
    <div>
        <button type="submit" id="search"></button>
    </div>
</form>
...

:


@app.route("/")
def index():
    cur = get_db().cursor()
    res = cur.execute("SELECT min(id), max(id) FROM articles")
    counter = res.fetchone()
    return render_template("index.html", counter = counter)

...
<div class="container">
        ...

        <p>   = {{ counter[0] }}..{{ counter [1] }}</p>

        ...
</div>
...

:


-


  • , - . . , ISO 8601, .

: .


import dateutil.parser
from datetime import datetime
...
@app.route('/post/<int:post_id>/')
def show_post(post_id):
    ...
    date = dateutil.parser.parse(article[1])
    date = datetime.strftime(date, "%d.%m.%Y %H:%M")
    ...
    return render_template("post.html", post=article, date=date, comments=comments)

  • lazy load , , :


    //  
    (async () => {
    for (let node of document.getElementsByTagName('img')) {
        await new Promise(res => {
            if(node.dataset.src !== undefined){
                node.src = node.dataset.src;
                node.onload = () => res();
            }
        })
    }
    })();

    , , , data-src


  • , "" :


    $(document).on('click', '.spoiler_title', function (e) {
    e.preventDefault();
    $(this).toggleClass('active');
    $(this).parent().find('.spoiler_text').first().slideToggle(300);
    });

  • . , Darkmode.js


    <script src="https://cdn.jsdelivr.net/npm/darkmode-js@1.5.5/lib/darkmode-js.min.js"></script>
    <script>
    new Darkmode().showWidget();
    </script>



, - , - ( ).



( !) flask .


export FLASK_APP=main.py
export FLASK_ENV=production
flask run

, , , , - uWSGI, nginx.



uWSGI, , , .


pip install uwsgi
uwsgi --socket 0.0.0.0:3031 --protocol http --wsgi-file main.py --callable app

, — , nginx,



, , . , - .


, , ( , ).


github.


Juga, karena VPS saya yang lemah dan kemungkinan perilaku, situs mungkin tidak tersedia. Akan lebih bagus jika seseorang mengangkat server yang sama.


All Articles