mastodon-BDSM/BDSM/toot.py

267 lines
9.6 KiB
Python
Raw Normal View History

2022-09-18 05:23:33 +02:00
#!/usr/bin/env python3
from mastodon import Mastodon
2022-11-08 19:21:50 +01:00
from tenacity import *
2022-09-18 05:23:33 +02:00
from BDSM import db
2022-11-07 15:42:20 +01:00
from BDSM.models import Other, Toot, Tag, Media, Emoji, Poll
2022-09-18 05:23:33 +02:00
import sys
import dateutil.parser
2022-09-18 05:23:33 +02:00
def app_register(url):
print("Registering app")
Mastodon.create_app(
'pyBDSM',
api_base_url = url,
to_file = 'pyBDSM_clientcred.secret',
scopes=["read"]
)
2022-11-07 19:25:06 +01:00
def app_login(url):
2022-09-18 05:23:33 +02:00
mastodon = Mastodon(
client_id='pyBDSM_clientcred.secret',
access_token='user.secret',
api_base_url=url
)
try:
user = mastodon.account_verify_credentials()
except Exception as e:
if "access token was revoked" in str(e):
print("revoked token")
sys.exit(0)
elif "Name or service not known" in str(e):
print("Error: the instance name is either misspelled or offline",
file=sys.stderr)
else:
print(e, file=sys.stderr)
# exit in either case
sys.exit(1)
2022-11-07 19:25:06 +01:00
return mastodon, user
def get_context(url, toot_id):
mastodon, user = app_login(url)
2022-09-18 05:23:33 +02:00
acct = mastodon.me().acct
2022-11-07 19:25:06 +01:00
context = mastodon.status_context(toot_id)
statuses = []
statuses= context['ancestors'] + context['descendants']
toot_process(statuses, acct)
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
db.session.commit()
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
def toot_process(statuses, my_acct, duplicates_counter=0):
for status in statuses:
is_reblog = False
is_myself = False
if status['reblog'] != None:
if my_acct == status['reblog']['account']['acct']:
reblog_myself = True
else:
reblog_myself = False
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
is_reblog = True
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
reblog_id = status['reblog']['id']
id = status['id']
created_at = status['created_at']
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
toot = Toot(id=id, created_at=created_at, reblog_myself=reblog_myself, reblog_id=reblog_id)
db.session.merge(toot)
# cur.execute('''INSERT OR REPLACE INTO TOOT (id,created_at,reblog_myself,reblog_id) \
# VALUES (?,?,?,?)''',(id, created_at, reblog_myself, reblog_id))
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
if reblog_myself:
continue
2022-09-18 05:23:33 +02:00
2022-11-07 19:25:06 +01:00
status = status['reblog']
2022-11-07 19:25:06 +01:00
id = status['id']
acct = status['account']['acct']
if my_acct == acct:
is_myself = True
else:
is_myself = False
url = status['url']
created_at = status['created_at']
if 'edited_at' in status:
edited_at = status['edited_at']
if isinstance(edited_at, str):
edited_at = dateutil.parser.parse(status['edited_at'])
else:
edited_at = None
2022-11-07 19:25:06 +01:00
in_reply_to_id = status['in_reply_to_id']
in_reply_to_account_id = status['in_reply_to_account_id']
content = status['content']
if status['media_attachments'] != []:
media_list = ""
for media_dict in status['media_attachments']:
media_list += str(media_dict['id']) + ","
media = Media(id=media_dict['id'], type=media_dict['type'], url=media_dict['url'],
remote_url=media_dict['remote_url'], description=media_dict['description'])
db.session.merge(media)
# cur.execute('''INSERT OR REPLACE INTO MEDIA (id,type,url,remote_url,description) \
# VALUES (?,?,?,?,?)''',(media_dict['id'], media_dict['type'], media_dict['url'], \
# media_dict['remote_url'], media_dict['description']))
else:
media_list = ""
spoiler_text = status['spoiler_text']
if status['poll'] != None:
poll_dict = status['poll']
poll_id = poll_dict['id']
expires_at = poll_dict['expires_at']
options = str(poll_dict['options'])
poll = Poll(id=poll_dict['id'], expires_at=expires_at, multiple=poll_dict['multiple'], \
votes_count=poll_dict['votes_count'], options=options)
db.session.merge(poll)
# cur.execute('''INSERT OR REPLACE INTO POLL (id,expires_at,multiple,votes_count,options) \
# VALUES (?,?,?,?,?)''',(poll_dict['id'], expires_at, poll_dict['multiple'], \
# poll_dict['votes_count'], options))
else:
poll_id = None
if status['emojis'] != []:
emoji_list = ""
for emoji in status['emojis']:
shortcode = emoji['shortcode']
emoji_list += shortcode + ","
counter = ':' + shortcode + ':'
count = content.count(counter)
if not is_reblog:
data=Emoji.query.filter_by(shortcode=shortcode, acct=acct).first()
2022-11-08 16:12:17 +01:00
if data == None:
2022-09-19 11:39:30 +02:00
emoji_data = Emoji(shortcode=shortcode,
acct=acct,
url=emoji['url'],
2022-11-07 19:25:06 +01:00
static_url=emoji['static_url'],
count=count)
2022-09-19 11:39:30 +02:00
db.session.merge(emoji_data)
2022-11-07 19:25:06 +01:00
# cur.execute('''INSERT INTO EMOJI (shortcode,url,static_url,count) \
# VALUES (?,?,?,?)''', (shortcode, emoji['url'], emoji['static_url'], count))
else:
2022-11-08 16:12:17 +01:00
if data.count == None:
data.count = count
else:
data.count += count
2022-11-07 19:25:06 +01:00
# cur.execute("UPDATE EMOJI SET count = ? WHERE shortcode = ?",(count, shortcode))
else:
emoji_data = Emoji(shortcode=shortcode,
acct=acct,
url=emoji['url'],
static_url=emoji['static_url'])
db.session.merge(emoji_data)
else:
emoji_list = ""
if status['tags'] != []:
for tag in status['tags']:
tag_data = Tag(id=id, name=tag['name'])
db.session.merge(tag_data)
# cur.execute('''INSERT OR REPLACE INTO TAG (id,name) \
# VALUES (?,?)''',(id, tag['name']))
visibility = status['visibility']
reblogged = status['reblogged']
favourited = status['favourited']
bookmarked = status['bookmarked']
sensitive = status['sensitive']
replies_count = status['replies_count']
reblogs_count = status['reblogs_count']
favourites_count = status['favourites_count']
language = status['language']
if is_reblog or not is_myself:
table = Other()
else:
table = Toot()
table.id=id
table.acct = acct
table.url=url
table.created_at=created_at
table.edited_at=edited_at
table.in_reply_to_id=in_reply_to_id
table.in_reply_to_account_id=in_reply_to_account_id
table.content=content
table.media_list=media_list
table.spoiler_text=spoiler_text
table.poll_id=poll_id
table.emoji_list=emoji_list
table.visibility=visibility
table.reblogged=reblogged
table.favourited=favourited
table.bookmarked=bookmarked
table.sensitive=sensitive
table.replies_count=replies_count
table.reblogs_count=reblogs_count
table.favourites_count=favourites_count
table.language=language
2022-11-07 19:39:35 +01:00
if Toot.query.get(id) != None or Other.query.get(id) != None:
2022-11-07 19:25:06 +01:00
duplicates_counter += 1
db.session.merge(table)
# sql = f'''INSERT OR REPLACE INTO {table} (id,url,created_at,edited_at,in_reply_to_id,in_reply_to_account_id,content,\
# media_list,spoiler_text,poll_id,emoji_list,visibility,reblogged,favourited,bookmarked,sensitive,reblogs_count,\
# favourites_count,language) \
# VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'''
# cur.execute(sql,(id,url,created_at,edited_at,in_reply_to_id,in_reply_to_account_id,content,media_list,spoiler_text,\
# poll_id,emoji_list,visibility,reblogged,favourited,bookmarked,sensitive,reblogs_count,favourites_count,language))
return duplicates_counter
2022-09-18 05:23:33 +02:00
2022-11-08 19:21:50 +01:00
def archive_toot(url, archive_match):
2022-11-07 19:25:06 +01:00
mastodon, user = app_login(url)
acct = mastodon.me().acct
2022-11-08 11:59:46 +01:00
def archive(statuses, skip_duplicates=True):
happy_counter = 20
duplicates_counter = 0
while(True):
duplicates_counter = toot_process(statuses, acct)
db.session.commit()
print(str(happy_counter) + ' / ' + statuses_count)
happy_counter += 20
if duplicates_counter >= 10 and skip_duplicates:
print("检测到重复嘟文达到十次,取消存档……")
break
2022-11-08 19:30:39 +01:00
@retry(stop=stop_after_attempt(5))
def archive_retry():
return mastodon.fetch_next(statuses)
statuses = archive_retry()
2022-11-08 11:59:46 +01:00
if statuses == None:
break
2022-11-08 19:21:50 +01:00
skip_duplicates = False
if 'duplicate' in archive_match:
skip_duplicates = True
2022-11-08 11:59:46 +01:00
2022-11-08 19:21:50 +01:00
if 'statuses' in archive_match:
statuses_count = str(mastodon.me().statuses_count)
statuses = mastodon.account_statuses(user["id"], limit=20)
archive(statuses, skip_duplicates=skip_duplicates)
2022-11-08 11:59:46 +01:00
2022-11-08 19:21:50 +01:00
if 'favourites' in archive_match:
statuses_count = '???'
statuses = mastodon.favourites()
archive(statuses, skip_duplicates=skip_duplicates)
if 'bookmarks' in archive_match:
statuses_count = '???'
statuses = mastodon.bookmarks()
archive(statuses, skip_duplicates=skip_duplicates)