Tous Habr dans une seule base. Commentaires et application web

KDPV


Partie 1 | mega.nz | DĂ©mo en ligne | Github


Probablement, c'est une continuation de l'article dans lequel je parcelle Habr dans la base de données. Il est maintenant temps de l'appliquer.


Petites modifications qui facilitent la vie


Structure de la base de données


Nous accélérons la base de données en créant un index.


CREATE INDEX article_indx ON articles(id);
CREATE INDEX comments_indx ON comments(article);

Le temps de demande pour créer l'index est de 1 s.


Le temps de requête de la première ligne est de 5 ms.


— 10 .



, (SSD ).


- ( ). sqlite3worker . , , .


from sqlite3worker import Sqlite3Worker
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import json
import requests
import logging

sql_worker = Sqlite3Worker("habr.db")
sql_worker.execute("CREATE TABLE IF NOT EXISTS comments(id INTEGER,"
                   "parent_id INTEGER,"
                   "article INTEGER,"
                   "level INTEGER,"
                   "timePublished TEXT,"
                   "score INTEGER,"
                   "message TEXT,"
                   "children TEXT,"
                   "author TEXT)")

def worker(i):
    url = "https://m.habr.com/kek/v2/articles/{}/comments/?fl=ru%2Cen&hl=ru".format(i)

    try:
        r = requests.get(url)
        if r.status_code == 503:
            logging.critical("503 Error")
            raise SystemExit
    except:
        with open("req_errors.txt", "a") as file:
            logging.critical("requests error")
            file.write(str(i))
        return 2

    try: data = json.loads(r.text)
    except: logging.warning("[{}] Json loads failed".format(i))

    if data['success']:
        comments = data['data']['comments']

        for comment in comments:
            current = comments[comment]

            id = current['id']
            parent_id = current['parentId']
            article = i
            level = current['level']
            time_published = current['timePublished']
            score = current['score']
            message = current['message']
            children = [children for children in current['children']]
            author = current['author']

            try: data = (id,
                    parent_id,
                    article,
                    level,
                    time_published,
                    score,
                    message,
                    str(children),
                    str(author['login']))
            except:
                data = (None, None, None, None, None, None, None, None, None)

            sql_worker.execute("INSERT INTO comments VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", data)

        logging.info("Comments on article {} were parsed".format(i))

min = 490000
max = 495000
pool = ThreadPool(3)

start_time = datetime.now()
results = pool.map(worker, range(min, max))

pool.close()
pool.join()
sql_worker.close()
print(datetime.now() - start_time)

dug .


.


-


, , -.
Flask, , .



Flask, .
DEBUG — — . .. , .


from flask import Flask, g, render_template, request, redirect, url_for
import sqlite3

app = Flask(__name__)
app.config['DEBUG'] = True
app.config['SECRET_KEY'] = 'supersecretkey'

DATABASE = "./habr.db"

-


.


def get_db():
    db = getattr(g, '_database', None)
    if db is None:
        db = g._database = sqlite3.connect(DATABASE)
    return db


. , . , .


@app.route("/")
def index():
    return render_template("index.html")


/post/id, .


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()
    res = cur.execute("SELECT * FROM articles WHERE id = :id", {"id": post_id} )
    article = res.fetchone()

    return render_template("post.html", post=article)


/post/id page

. Flask html- .
templates html-, Jinja.


, , title , .


, {{ var }} = print(var) Python.


<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
                         <meta http-equiv="X-UA-Compatible" content="ie=edge">
             <title>{{ post[3] }}</title>
</head>
<body>

</body>
</html>


.
.


<div class="header">
    <h1 class="display-4">{{ post[3] }}</h1>
    <br>
    <p class="">
        <strong>{{ date }}</strong>  <strong> {{ post[2] }}</strong>
    </p>

    <!--      ,     -->
    {% if post[9] == 1 %}
        <mark>Tutorial</mark> <br>
    {% endif %}

    <!--  6 - , 8 - , 7 -    -->
    <i class="fas fa-comments"></i> {{ post[6] }} |
    <i class="fas fa-arrows-alt-v"></i> {{ post[8] }} |
    <i class="fas fa-eye"></i> {{ post[7] }} 
</div>
    <div class="content">
        <div>
            <!-- safe -  ,   ,          -->
            <p>{{ post[4]|safe }}</p>
        </div>
        <br>
        <strong><i class="fas fa-tags"></i> :</strong> {{ post[10] }}
    </div>
</div>


. comments.
,


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()

    ...

    res = cur.execute("SELECT * FROM comments WHERE article = :id", {"id": post_id} )
    comments = res.fetchall()

    return render_template("post.html", post=article, comments=comments)

:


<div id="comments">
    <h2></h2>
    {% for comment in comments %}
        <div id="{{ comment[0] }}" class="shadow-sm p-3 mb-5 bg-light rounded">
            <div class="comment-header">
                <small>#{{comment[0]}}</small>
                <strong>{{ comment[8] }}</strong>
                <small>{{comment[4]}} | <i class="fas fa-arrows-alt-v"></i> {{ comment[5] }}</small>
            </div>
            {% if comment[1] != 0 %}
                <small><a href="#{{comment[1]}}"><mark>  #{{ comment[1] }}</mark></a></small>
                {{comment[6]|safe}}
            {% else %}
                {{comment[6]|safe}}
            {% endif %}
            <br>
        </div>
    {% endfor %}
</div>

Jinja {% %}


Index page

— .


, , .


POST- :
main.py


@app.route('/', methods=['POST'])
def index_post():
    text = request.form['url']
    id = ''.join(x for x in text if x.isdigit())
    if id != '':
        return redirect(url_for('show_post', post_id=id))
    else: return " "

index.html


...
<form method="POST">
    <input type="url" name="url">
    <div>
        <button type="submit" id="search"></button>
    </div>
</form>
...

:


@app.route("/")
def index():
    cur = get_db().cursor()
    res = cur.execute("SELECT min(id), max(id) FROM articles")
    counter = res.fetchone()
    return render_template("index.html", counter = counter)

...
<div class="container">
        ...

        <p>   = {{ counter[0] }}..{{ counter [1] }}</p>

        ...
</div>
...

:


-


  • , - . . , ISO 8601, .

: .


import dateutil.parser
from datetime import datetime
...
@app.route('/post/<int:post_id>/')
def show_post(post_id):
    ...
    date = dateutil.parser.parse(article[1])
    date = datetime.strftime(date, "%d.%m.%Y %H:%M")
    ...
    return render_template("post.html", post=article, date=date, comments=comments)

  • lazy load , , :


    //  
    (async () => {
    for (let node of document.getElementsByTagName('img')) {
        await new Promise(res => {
            if(node.dataset.src !== undefined){
                node.src = node.dataset.src;
                node.onload = () => res();
            }
        })
    }
    })();

    , , , data-src


  • , "" :


    $(document).on('click', '.spoiler_title', function (e) {
    e.preventDefault();
    $(this).toggleClass('active');
    $(this).parent().find('.spoiler_text').first().slideToggle(300);
    });

  • . , Darkmode.js


    <script src="https://cdn.jsdelivr.net/npm/darkmode-js@1.5.5/lib/darkmode-js.min.js"></script>
    <script>
    new Darkmode().showWidget();
    </script>



, - , - ( ).



( !) flask .


export FLASK_APP=main.py
export FLASK_ENV=production
flask run

, , , , - uWSGI, nginx.



— uWSGI, , , .


pip install uwsgi
uwsgi --socket 0.0.0.0:3031 --protocol http --wsgi-file main.py --callable app

, — , nginx,



, , . , - .


, , ( , ).


github.


De plus, en raison de mon faible VPS et de mon habraeffet possible, le site peut ne pas être disponible. Ce sera génial si quelqu'un soulève un serveur similaire.


All Articles