From 9751bf28b981d6c437f44517147ef2dcdeebac78 Mon Sep 17 00:00:00 2001 From: Christian Groschupp Date: Thu, 27 Mar 2025 10:18:52 +0100 Subject: [PATCH] feat: calculate saved space --- cleanmedia.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/cleanmedia.py b/cleanmedia.py index 2a00720..58825d6 100755 --- a/cleanmedia.py +++ b/cleanmedia.py @@ -52,12 +52,14 @@ class File: media_id: MediaID, creation_ts: Timestamp, base64hash: Base64Hash, + file_size: int, ) -> None: """Initialize a File object.""" self.repo = media_repo self.media_id = media_id self.create_date = datetime.fromtimestamp(creation_ts) self.base64hash = base64hash + self.file_size = file_size @cached_property def fullpath(self) -> Path | None: @@ -163,7 +165,7 @@ class MediaRepository: for db_type, conn_string in connection_strings.items(): self.conn[db_type] = self.connect_db(conn_string) - def execute(self, query: str, params=(), db_type: str = "media_api"): + def _execute(self, query: str, params=(), db_type: str = "media_api"): paramstyle = getattr(self.conn, "paramstyle", "format") query = self._adjust_paramstyle(query, paramstyle) cur = self.conn[db_type].cursor() @@ -203,7 +205,7 @@ class MediaRepository: def get_single_media(self, mxid: MediaID) -> File | None: """Retrieve a single media file by ID.""" - cur = self.execute( + cur = self._execute( "SELECT media_id, creation_ts, base64hash from mediaapi_media_repository WHERE media_id = %s;", (mxid,), ) @@ -219,7 +221,7 @@ class MediaRepository: Returns: List of File objects """ - cur = self.execute( + cur = self._execute( "SELECT media_id, creation_ts, base64hash FROM mediaapi_media_repository WHERE user_id = %s;", (user_id,), ) @@ -234,12 +236,12 @@ class MediaRepository: Returns: List of File objects """ - query = """SELECT media_id, creation_ts, base64hash + query = """SELECT media_id, creation_ts, base64hash, file_size_bytes FROM mediaapi_media_repository""" if not local: query += " WHERE user_id = ''" - cur = self.execute(query) - return [File(self, row[0], row[1] // 1000, row[2]) for row in cur.fetchall()] + cur = self._execute(query) + return [File(self, row[0], row[1] // 1000, row[2], row[3]) for row in cur.fetchall()] def get_avatar_images(self) -> List[MediaID]: """Get media IDs of current avatar images. @@ -247,7 +249,7 @@ class MediaRepository: Returns: List of media IDs """ - cur = self.execute("SELECT avatar_url FROM userapi_profiles WHERE avatar_url > '';", db_type="user_api") + cur = self._execute("SELECT avatar_url FROM userapi_profiles WHERE avatar_url > '';", db_type="user_api") media_ids = [] for (url,) in cur.fetchall(): try: @@ -259,7 +261,7 @@ class MediaRepository: def sanity_check_thumbnails(self) -> None: """Check for orphaned thumbnail entries in database.""" - cur = self.execute( + cur = self._execute( """SELECT COUNT(media_id) FROM mediaapi_thumbnail WHERE NOT EXISTS (SELECT media_id FROM mediaapi_media_repository);""", ) @@ -292,8 +294,9 @@ class MediaRepository: for f in self.get_all_media(local) if f.media_id not in self._avatar_media_ids and f.create_date < cutoff_date ] - + file_size_counter = 0 for file in files_to_delete: + file_size_counter += file.file_size if dryrun: logging.info(f"Would delete file {file.media_id} at {file.fullpath}") if not file.exists(): @@ -302,7 +305,7 @@ class MediaRepository: file.delete() action = "Would have deleted" if dryrun else "Deleted" - logging.info("%s %d files", action, len(files_to_delete)) + logging.info("%s %d files, in total %s", action, len(files_to_delete), sizeof_fmt(file_size_counter)) return len(files_to_delete) @@ -348,6 +351,29 @@ def read_config(conf_file: Union[str, Path]) -> Tuple[Path, str, dict[str, str]] return Path(base_path), conns +def sizeof_fmt(num: Union[int, float], suffix: str = "B") -> str: + """ + Convert a number of bytes (or other units) into a human-readable format using binary prefixes. + + Args: + num (int | float): The number to format, typically a size in bytes. + suffix (str): The suffix to append (default is 'B' for bytes). + + Returns: + str: A string representing the human-readable size (e.g. '1.5MiB', '42.0KiB'). + + Notes: + Uses binary (base-1024) units: Ki, Mi, Gi, Ti, etc. + Automatically chooses the appropriate unit based on the input value. + """ + BASE = 1024.0 + + for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): + if abs(num) < BASE: + return f"{num:3.1f}{unit}{suffix}" + num /= 1024.0 + return f"{num:.1f}Yi{suffix}" + def parse_options() -> argparse.Namespace: """Parse command line arguments.