From 17f291dae52f626a362ee836650233f190b3e3ff Mon Sep 17 00:00:00 2001 From: SouthFox Date: Sun, 26 Mar 2023 01:52:04 +0800 Subject: [PATCH] feat/some feat and update --- .gitignore | 2 + BDSM/commands.py | 91 ++- BDSM/models.py | 26 +- BDSM/templates/base.html | 2 + BDSM/templates/settings.html | 17 +- BDSM/toot.py | 20 +- BDSM/views.py | 95 ++- Pipfile | 3 + Pipfile.lock | 523 ++++++++++-- README.md | 29 - README.org | 25 + misc/mask.png | Bin 0 -> 120593 bytes misc/stopwords.txt | 1477 ++++++++++++++++++++++++++++++++++ misc/user_dict.txt | 3 + requirements.txt | 19 +- 15 files changed, 2127 insertions(+), 205 deletions(-) delete mode 100644 README.md create mode 100644 README.org create mode 100644 misc/mask.png create mode 100644 misc/stopwords.txt create mode 100644 misc/user_dict.txt diff --git a/.gitignore b/.gitignore index dbb87a6..5e52409 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .idea *.log tmp/ +.venv/ +test.py *.py[cod] *.egg diff --git a/BDSM/commands.py b/BDSM/commands.py index bf8de89..91cdd86 100644 --- a/BDSM/commands.py +++ b/BDSM/commands.py @@ -2,7 +2,7 @@ import click from BDSM import app, db -from BDSM.models import Settings, Toot, Other +from BDSM.models import Settings, Toot from BDSM.toot import app_login, toot_process from mastodon import MastodonNotFoundError @@ -16,6 +16,87 @@ def initdb(drop): db.create_all() click.echo('Initialized database.') +@app.cli.command() +def analysis(): + """Analysis current Year""" + from BDSM.models import Toot + from sqlalchemy.sql import extract + from sqlalchemy import func + from sqlalchemy import desc + from . import db + from wordcloud import WordCloud + from PIL import Image + import numpy as np + import jieba + import re + + year_toots = Toot.query.filter(extract('year', Toot.created_at) == 2022) + print("2022 总计嘟文" + str(len(year_toots.all()))) + print("2022 年发言最多天数排名" + + str(db.session.query(func.strftime("%Y-%m-%d", Toot.created_at + ).label('date'),func.count('date') + ).filter(extract('year', Toot.created_at) == 2022 + ).group_by('date' + ).order_by(desc(func.count('date')) + ).all()[:3]) + ) + + print("2022 年互动最多帐号排名" + + str(db.session.query(Toot.acct.label('count'),func.count('count') + ).filter(extract('year', Toot.created_at) == 2022 + ).group_by('count' + ).order_by(desc(func.count('count')) + ).all()[:3]) + ) + + toots_counter = 0 + public_counter = 0 + toots_content = '' + + html_pattern = re.compile(r'<[^>]+>',re.S) + url_pattern = re.compile(r'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()!@:%_\+.~#?&\/\/=]*)',re.S) + emoji_pattern = re.compile(r':+?[a-zA-Z0-9_]+:',re.S) + at_pattern = re.compile(r'@+?[a-zA-Z0-9\._]+ ',re.S) + mask = np.array(Image.open("misc/mask.png")) + + for i in year_toots: + if i.content != None: + toot_content = html_pattern.sub('', i.content) + toot_content = url_pattern.sub('', toot_content) + toot_content = emoji_pattern.sub('', toot_content) + toot_content = at_pattern.sub('', toot_content) + toots_content += toot_content + + toots_counter += 1 + if i.visibility == 'public': + public_counter += 1 + + print("2022 实际有内容嘟文数量:" + str(toots_counter)) + print("2022 公开嘟文数量" + str(public_counter)) + + jieba.load_userdict(r'misc/user_dict.txt') + wordlist = jieba.lcut(toots_content) + space_list = ' '.join(wordlist) + stopwords = set() + content = [line.strip() for line in open('misc/stopwords.txt','r', + encoding='utf-8').readlines()] + stopwords.update(content) + + wc = WordCloud(width=1400, height=2200, + background_color='white', + mask=mask, + stopwords=stopwords, + mode='RGB', + max_words=500, + max_font_size=150, + #relative_scaling=0.6, + font_path="/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc", + random_state=50, + scale=2 + ).generate(space_list) + wc.to_file("output.png") + print(" 词图统计已生成在根目录,名字为 output.png") + @app.cli.command() def renderfile(): """render toot""" @@ -112,17 +193,15 @@ def renderfile(): def graball(): """Grab all toots context""" settings = Settings.query.first() - account = settings.account[1:] - username, domain = account.split("@") + domain = settings.domain url = "https://" + domain - mastodon, user = app_login(url) + mastodon, _ = app_login(url) acct = mastodon.me().acct toots = Toot.query.filter(Toot.in_reply_to_id.isnot(None)).all() toots_id = [] for i in toots: - if (Toot.query.get(i.in_reply_to_id) != None - or Other.query.get(i.in_reply_to_id) != None): + if (Toot.query.get(i.in_reply_to_id) != None): continue #context api excluding itself toots_id.append(i.id) diff --git a/BDSM/models.py b/BDSM/models.py index 07601ef..5a48936 100644 --- a/BDSM/models.py +++ b/BDSM/models.py @@ -26,29 +26,6 @@ class Toot(db.Model): favourites_count = db.Column(db.Integer) language = db.Column(db.Text) -class Other(db.Model): - id = db.Column(db.Integer, primary_key=True) - acct = db.Column(db.Text) - url = db.Column(db.Text) - created_at = db.Column(db.DateTime) - edited_at = db.Column(db.DateTime) - in_reply_to_id = db.Column(db.Integer) - in_reply_to_account_id = db.Column(db.Integer) - content = db.Column(db.Text) - media_list = db.Column(db.Text) - emoji_list = db.Column(db.Text) - spoiler_text = db.Column(db.Text) - poll_id = db.Column(db.Integer) - visibility = db.Column(db.Text) - reblogged = db.Column(db.Boolean) - favourited = db.Column(db.Boolean) - bookmarked = db.Column(db.Boolean) - sensitive = db.Column(db.Boolean) - replies_count = db.Column(db.Integer) - reblogs_count = db.Column(db.Integer) - favourites_count = db.Column(db.Integer) - language = db.Column(db.Text) - class Tag(db.Model): __table_args__ = {'sqlite_autoincrement': True} tag_id = db.Column(db.Integer, primary_key=True) @@ -79,6 +56,7 @@ class Poll(db.Model): options = db.Column(db.Text) class Settings(db.Model): - account = db.Column(db.Text, primary_key=True) + domain = db.Column(db.Text, primary_key=True) + account = db.Column(db.Text) timezone = db.Column(db.Text) setup = db.Column(db.Boolean) diff --git a/BDSM/templates/base.html b/BDSM/templates/base.html index 47ba1ae..64ad576 100644 --- a/BDSM/templates/base.html +++ b/BDSM/templates/base.html @@ -28,6 +28,8 @@