كل هبر في قاعدة واحدة. التعليقات وتطبيق الويب

KDPV


الجزء الأول | mega.nz | عرض على الإنترنت | جيثب


ربما ، هذا استمرار للمقال الذي أقسم فيه هبر إلى قاعدة البيانات. حان الوقت لتطبيقه.


تعديلات صغيرة تجعل الحياة أسهل


هيكل قاعدة البيانات


نقوم بتسريع قاعدة البيانات عن طريق إنشاء فهرس.


CREATE INDEX article_indx ON articles(id);
CREATE INDEX comments_indx ON comments(article);

وقت الطلب لإنشاء الفهرس هو 1 ثانية.


وقت طلب السطر الأول هو 5 مللي ثانية.


— 10 .



, (SSD ).


- ( ). sqlite3worker . , , .


from sqlite3worker import Sqlite3Worker
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import json
import requests
import logging

sql_worker = Sqlite3Worker("habr.db")
sql_worker.execute("CREATE TABLE IF NOT EXISTS comments(id INTEGER,"
                   "parent_id INTEGER,"
                   "article INTEGER,"
                   "level INTEGER,"
                   "timePublished TEXT,"
                   "score INTEGER,"
                   "message TEXT,"
                   "children TEXT,"
                   "author TEXT)")

def worker(i):
    url = "https://m.habr.com/kek/v2/articles/{}/comments/?fl=ru%2Cen&hl=ru".format(i)

    try:
        r = requests.get(url)
        if r.status_code == 503:
            logging.critical("503 Error")
            raise SystemExit
    except:
        with open("req_errors.txt", "a") as file:
            logging.critical("requests error")
            file.write(str(i))
        return 2

    try: data = json.loads(r.text)
    except: logging.warning("[{}] Json loads failed".format(i))

    if data['success']:
        comments = data['data']['comments']

        for comment in comments:
            current = comments[comment]

            id = current['id']
            parent_id = current['parentId']
            article = i
            level = current['level']
            time_published = current['timePublished']
            score = current['score']
            message = current['message']
            children = [children for children in current['children']]
            author = current['author']

            try: data = (id,
                    parent_id,
                    article,
                    level,
                    time_published,
                    score,
                    message,
                    str(children),
                    str(author['login']))
            except:
                data = (None, None, None, None, None, None, None, None, None)

            sql_worker.execute("INSERT INTO comments VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", data)

        logging.info("Comments on article {} were parsed".format(i))

min = 490000
max = 495000
pool = ThreadPool(3)

start_time = datetime.now()
results = pool.map(worker, range(min, max))

pool.close()
pool.join()
sql_worker.close()
print(datetime.now() - start_time)

dug .


.


-


, , -.
Flask, , .



Flask, .
DEBUG — — . .. , .


from flask import Flask, g, render_template, request, redirect, url_for
import sqlite3

app = Flask(__name__)
app.config['DEBUG'] = True
app.config['SECRET_KEY'] = 'supersecretkey'

DATABASE = "./habr.db"

-


.


def get_db():
    db = getattr(g, '_database', None)
    if db is None:
        db = g._database = sqlite3.connect(DATABASE)
    return db


. , . , .


@app.route("/")
def index():
    return render_template("index.html")


/post/id, .


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()
    res = cur.execute("SELECT * FROM articles WHERE id = :id", {"id": post_id} )
    article = res.fetchone()

    return render_template("post.html", post=article)


/post/id page

. Flask html- .
templates html-, Jinja.


, , title , .


, {{ var }} = print(var) Python.


<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
                         <meta http-equiv="X-UA-Compatible" content="ie=edge">
             <title>{{ post[3] }}</title>
</head>
<body>

</body>
</html>


.
.


<div class="header">
    <h1 class="display-4">{{ post[3] }}</h1>
    <br>
    <p class="">
        <strong>{{ date }}</strong>  <strong> {{ post[2] }}</strong>
    </p>

    <!--      ,     -->
    {% if post[9] == 1 %}
        <mark>Tutorial</mark> <br>
    {% endif %}

    <!--  6 - , 8 - , 7 -    -->
    <i class="fas fa-comments"></i> {{ post[6] }} |
    <i class="fas fa-arrows-alt-v"></i> {{ post[8] }} |
    <i class="fas fa-eye"></i> {{ post[7] }} 
</div>
    <div class="content">
        <div>
            <!-- safe -  ,   ,          -->
            <p>{{ post[4]|safe }}</p>
        </div>
        <br>
        <strong><i class="fas fa-tags"></i> :</strong> {{ post[10] }}
    </div>
</div>


. comments.
,


@app.route('/post/<int:post_id>/')
def show_post(post_id):
    cur = get_db().cursor()

    ...

    res = cur.execute("SELECT * FROM comments WHERE article = :id", {"id": post_id} )
    comments = res.fetchall()

    return render_template("post.html", post=article, comments=comments)

:


<div id="comments">
    <h2></h2>
    {% for comment in comments %}
        <div id="{{ comment[0] }}" class="shadow-sm p-3 mb-5 bg-light rounded">
            <div class="comment-header">
                <small>#{{comment[0]}}</small>
                <strong>{{ comment[8] }}</strong>
                <small>{{comment[4]}} | <i class="fas fa-arrows-alt-v"></i> {{ comment[5] }}</small>
            </div>
            {% if comment[1] != 0 %}
                <small><a href="#{{comment[1]}}"><mark>  #{{ comment[1] }}</mark></a></small>
                {{comment[6]|safe}}
            {% else %}
                {{comment[6]|safe}}
            {% endif %}
            <br>
        </div>
    {% endfor %}
</div>

Jinja {% %}


Index page

— .


, , .


POST- :
main.py


@app.route('/', methods=['POST'])
def index_post():
    text = request.form['url']
    id = ''.join(x for x in text if x.isdigit())
    if id != '':
        return redirect(url_for('show_post', post_id=id))
    else: return " "

index.html


...
<form method="POST">
    <input type="url" name="url">
    <div>
        <button type="submit" id="search"></button>
    </div>
</form>
...

:


@app.route("/")
def index():
    cur = get_db().cursor()
    res = cur.execute("SELECT min(id), max(id) FROM articles")
    counter = res.fetchone()
    return render_template("index.html", counter = counter)

...
<div class="container">
        ...

        <p>   = {{ counter[0] }}..{{ counter [1] }}</p>

        ...
</div>
...

:


-


  • , - . . , ISO 8601, .

: .


import dateutil.parser
from datetime import datetime
...
@app.route('/post/<int:post_id>/')
def show_post(post_id):
    ...
    date = dateutil.parser.parse(article[1])
    date = datetime.strftime(date, "%d.%m.%Y %H:%M")
    ...
    return render_template("post.html", post=article, date=date, comments=comments)

  • lazy load , , :


    //  
    (async () => {
    for (let node of document.getElementsByTagName('img')) {
        await new Promise(res => {
            if(node.dataset.src !== undefined){
                node.src = node.dataset.src;
                node.onload = () => res();
            }
        })
    }
    })();

    , , , data-src


  • , "" :


    $(document).on('click', '.spoiler_title', function (e) {
    e.preventDefault();
    $(this).toggleClass('active');
    $(this).parent().find('.spoiler_text').first().slideToggle(300);
    });

  • . , Darkmode.js


    <script src="https://cdn.jsdelivr.net/npm/darkmode-js@1.5.5/lib/darkmode-js.min.js"></script>
    <script>
    new Darkmode().showWidget();
    </script>



, - , - ( ).



( !) flask .


export FLASK_APP=main.py
export FLASK_ENV=production
flask run

, , , , - uWSGI, nginx.



uWSGI, , , .


pip install uwsgi
uwsgi --socket 0.0.0.0:3031 --protocol http --wsgi-file main.py --callable app

, — , nginx,



, , . , - .


, , ( , ).


github.


أيضًا ، نظرًا لضعف VPS الخاص بي واحتمال تأثير العيب ، فقد لا يكون الموقع متاحًا. سيكون من الرائع إذا قام شخص ما برفع خادم مماثل.


All Articles