Compare commits

...

2 Commits

Author SHA1 Message Date
Sebastian Spaeth
2886c1d06b add deletion of single media 2023-12-11 14:07:07 +01:00
Sebastian Spaeth
2ebf1a1e05 refactor
Put the main workhorse (cleaning out old media) in a function of the
MediaRepository, so that we can introduce alternative functions, e.g.
cleaning specific single media.
2023-12-11 13:27:31 +01:00

View File

@ -112,7 +112,17 @@ class MediaRepository:
raise ValueError(errstr) raise ValueError(errstr)
return psycopg2.connect(self.db_conn_string) return psycopg2.connect(self.db_conn_string)
def get_media(self, local: bool = False) -> List[File]: def get_single_media(self, mxid: str) -> Optional[File]:
"""Return `File` or `None`"""
with self.conn.cursor() as cur:
sql_str = "SELECT media_id, creation_ts, base64hash from mediaapi_media_repository WHERE media_id = %s;"
cur.execute(sql_str,(mxid,))
row = cur.fetchone()
if row is None: return None
# creation_ts is ms since the epoch, so convert to seconds
return File(self, row[0], row[1] // 1000, row[2])
def get_all_media(self, local: bool = False) -> List[File]:
"""Return List[File] of remote media or ALL media if local==True""" """Return List[File] of remote media or ALL media if local==True"""
with self.conn.cursor() as cur: with self.conn.cursor() as cur:
# media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id # media_id | media_origin | content_type | file_size_bytes | creation_ts | upload_name | base64hash | user_id
@ -154,6 +164,41 @@ class MediaRepository:
if row is not None and row[0]: if row is not None and row[0]:
logging.error("You have {} thumbnails in your db that do not refer to media. This needs fixing (we don't do that)!".format(row[0])) logging.error("You have {} thumbnails in your db that do not refer to media. This needs fixing (we don't do that)!".format(row[0]))
def clean_media_files(self, days: int, local: bool = False, dryrun: bool = False) -> int:
"""Clean out old media files from this repository
:params:
:days: (int) delete media files older than N days.
:local: (bool) Also delete media originating from local users
:dryrun: (bool) Do not actually delete any files (just count)
:returns: (int) The number of files that were/would be deleted
"""
# Preps
if local:
# populate the cache of current avt img. so we don't delete them
logging.warning("AVATAR")
mr.get_avatar_images()
cleantime = datetime.today() - timedelta(days=days)
logging.info("Deleting remote media older than %s", cleantime)
num_deleted = 0
files = mr.get_all_media(local)
for file in [f for f in files if f.media_id not in mr._avatar_media_ids]:
if file.create_date < cleantime:
num_deleted += 1
if dryrun: # the great pretender
logging.info(f"Pretending to delete file id {file.media_id} on path {file.fullpath}.")
if not file.exists():
logging.info(f"File id {file.media_id} does not physically exist (path {file.fullpath}).")
else:
file.delete()
info_str = "Deleted %d files during the run."
if dryrun:
info_str = "%d files would have been deleted during the run."
logging.info(info_str, num_deleted)
return num_deleted
# -------------------------------------------------------------- # --------------------------------------------------------------
def read_config(conf_file: Union[str, Path]) -> Tuple[Path, str]: def read_config(conf_file: Union[str, Path]) -> Tuple[Path, str]:
@ -195,6 +240,8 @@ def parse_options() -> argparse.Namespace:
prog='cleanmedia', prog='cleanmedia',
description='Deletes 30 day old remote media files from dendrite servers') description='Deletes 30 day old remote media files from dendrite servers')
parser.add_argument('-c', '--config', default="config.yaml", help="location of the dendrite.yaml config file.") parser.add_argument('-c', '--config', default="config.yaml", help="location of the dendrite.yaml config file.")
parser.add_argument('-m', '--mxid', dest="mxid",
help="Just delete media <MXID>. (no cleanup otherwise)")
parser.add_argument('-t', '--days', dest="days", parser.add_argument('-t', '--days', dest="days",
default="30", type=int, default="30", type=int,
help="Keep remote media for <DAYS> days.") help="Keep remote media for <DAYS> days.")
@ -217,28 +264,16 @@ if __name__ == '__main__':
args = parse_options() args = parse_options()
(MEDIA_PATH, CONN_STR) = read_config(args.config) (MEDIA_PATH, CONN_STR) = read_config(args.config)
mr = MediaRepository(MEDIA_PATH, CONN_STR) mr = MediaRepository(MEDIA_PATH, CONN_STR)
# Sanity checks
mr.sanity_check_thumbnails() # warn in case of superfluous thumbnails
# Preps if args.mxid:
if args.local: # Just clean a single media
# populate the cache of current avt img. so we don't delete them file = mr.get_single_media(args.mxid)
mr.get_avatar_images() logging.info("Found media with id", args.mxid)
# ------real main part------------ if file and not args.dryrun:
cleantime = datetime.today() - timedelta(days=args.days) file.delete()
logging.info("Deleting remote media older than %s", cleantime)
num_deleted = 0
files = mr.get_media(args.local)
for file in [f for f in files if f.media_id not in mr._avatar_media_ids]:
if file.create_date < cleantime:
num_deleted += 1
if args.dryrun: # the great pretender
logging.info(f"Pretending to delete file id {file.media_id} on path {file.fullpath}.")
if not file.exists():
logging.info(f"File id {file.media_id} does not physically exist (path {file.fullpath}).")
else:
file.delete()
if args.dryrun:
logging.info("%d files would have been deleted during the run.",
num_deleted)
else: else:
logging.info("Deleted %d files during the run.", num_deleted) # Sanity checks
mr.sanity_check_thumbnails() # warn in case of superfluous thumbnails
# Clean out of files
mr.clean_media_files(args.days, args.local, args.dryrun)