Bump version to 2.1.1

- Update __version__ in subscleaner/__init__.py. - Update version in pyproject.toml. - Update version in uv.lock.
Add verbose output option to README
2025-03-29 10:43:05 -03:00 · 2025-03-29 10:42:23 -03:00 · 2025-03-29 10:37:24 -03:00 · 2025-03-29 10:35:52 -03:00 · 2025-03-28 22:12:43 -03:00 · 2025-03-28 22:12:23 -03:00
6 changed files with 81 additions and 124 deletions
--- a/README.md
+++ b/README.md
@ -152,8 +152,8 @@ Subscleaner now uses a SQLite database to track processed files, which significa
 The SQLite database is stored in the following locations, depending on your operating system:
- **Linux**: `~/.local/share/subscleaner/subscleaner/subscleaner.db`
+- **Linux**: `~/.local/share/subscleaner/subscleaner.db`
- **macOS**: `~/Library/Application Support/subscleaner/subscleaner/subscleaner.db`
+- **macOS**: `~/Library/Application Support/subscleaner/subscleaner.db`
 - **Windows**: `C:\Users\<username>\AppData\Local\subscleaner\subscleaner\subscleaner.db`
 ### Command Line Options
@ -165,11 +165,13 @@ Several command line options are available:
 - `--reset-db`: Reset the database (remove all stored file hashes)
 - `--list-patterns`: List all advertisement patterns being used
 - `--version`: Show version information and exit
 - `-v`, `--verbose`: Increase output verbosity (show analyzing/skipping messages)
 Example usage:
 ```sh
 find /your/media/location -name "*.srt" | subscleaner --force
 find /your/media/location -name "*.srt" | subscleaner --db-location /path/to/custom/database.db
 find /your/media/location -name "*.srt" | subscleaner --verbose
 ```
 This feature makes Subscleaner more efficient, especially when running regularly via cron jobs or other scheduled tasks, as it will only process new or modified subtitle files.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "subscleaner"
-version = "2.0.0"
+version = "2.1.1"
 description = "Remove advertisements from subtitle files"
 authors = [
    {name = "Roger Gonzalez", email = "roger@rogs.me"}
--- a/src/subscleaner/init.py
+++ b/src/subscleaner/init.py
@ -1,3 +1,3 @@
 """Subscleaner package."""
-__version__ = "1.3.0"
+__version__ = "2.1.1"
--- a/src/subscleaner/subscleaner.py
+++ b/src/subscleaner/subscleaner.py
@ -20,12 +20,10 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 import argparse
 import hashlib
 import os
 import pathlib
 import re
 import sqlite3
 import sys
 import time
 import chardet
 import pysrt
@ -229,27 +227,6 @@ def contains_ad(subtitle_line: str) -> bool:
    return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
 def is_processed_before(subtitle_file: pathlib.Path) -> bool:
    """
    Check if the subtitle file has already been processed.
    Args:
        subtitle_file (pathlib.Path): The path to the subtitle file.
    Returns:
        bool: True if the subtitle file has already been processed, False otherwise.
    """
    try:
        file_creation_time = os.path.getctime(subtitle_file)
        processed_timestamp = time.mktime(
            time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
        )
        return file_creation_time < processed_timestamp
    except Exception as e:
        print(f"Error checking if file was processed before: {e}")
        return False
 def get_encoding(subtitle_file: pathlib.Path) -> str:
    """
    Detect the encoding of the subtitle file.
@ -293,12 +270,11 @@ def remove_ad_lines(subtitle_data: pysrt.SubRipFile) -> bool:
    return modified
-def is_already_processed(subtitle_file, db_path, file_hash, force=False):
+def is_already_processed(subtitle_file, db_path, file_hash, force=False, verbose=False):
    """
    Check if the subtitle file has already been processed.
-    This function checks both the database and the timestamp to determine
+    This function checks the database to determine if a file has already been processed.
    if a file has already been processed.
    Args:
        subtitle_file (pathlib.Path): The path to the subtitle file.
@ -314,20 +290,14 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False):
    # Check if the file is in the database with the same hash
    if is_file_processed(db_path, str(subtitle_file), file_hash):
-        print(f"Already processed {subtitle_file} (hash match)")
+        if verbose:
-        return True
+            print(f"Already processed {subtitle_file} (hash match)")
    # Check based on timestamp
    if is_processed_before(subtitle_file):
        print(f"Already processed {subtitle_file} (timestamp check)")
        # Still mark it in the database
        mark_file_processed(db_path, str(subtitle_file), file_hash)
        return True
    return False
-def process_subtitle_file(subtitle_file_path: str, db_path, force=False) -> bool:
+def process_subtitle_file(subtitle_file_path: str, db_path, force=False, verbose=False) -> bool:
    """
    Process a subtitle file to remove ad lines.
@ -335,13 +305,15 @@ def process_subtitle_file(subtitle_file_path: str, db_path, force=False) -> bool
        subtitle_file_path (str): The path to the subtitle file.
        db_path (pathlib.Path): The path to the database file.
        force (bool): If True, process the file even if it has been processed before.
        verbose (bool): If True, print detailed processing information.
    Returns:
        bool: True if the subtitle file was modified, False otherwise.
    """
    try:
        subtitle_file = pathlib.Path(subtitle_file_path)
-        print(f"Analyzing: {subtitle_file}")
+        if verbose:
            print(f"Analyzing: {subtitle_file}")
        # Early validation checks
        if not subtitle_file.exists():
@ -390,7 +362,7 @@ def process_subtitle_file(subtitle_file_path: str, db_path, force=False) -> bool
        return False
-def process_subtitle_files(subtitle_files: list[str], db_path, force=False) -> list[str]:
+def process_subtitle_files(subtitle_files: list[str], db_path, force=False, verbose=False) -> list[str]:
    """
    Process multiple subtitle files to remove ad lines.
@ -398,25 +370,20 @@ def process_subtitle_files(subtitle_files: list[str], db_path, force=False) -> l
        subtitle_files (list[str]): A list of subtitle file paths.
        db_path (pathlib.Path): The path to the database file.
        force (bool): If True, process files even if they have been processed before.
        verbose (bool): If True, print detailed processing information.
    Returns:
        list[str]: A list of modified subtitle file paths.
    """
    modified_files = []
    for subtitle_file in subtitle_files:
-        if process_subtitle_file(subtitle_file, db_path, force):
+        if process_subtitle_file(subtitle_file, db_path, force, verbose):
            modified_files.append(subtitle_file)
    return modified_files
-def main():
+def _parse_args():
-    """
+    """Parse command line arguments."""
    Process subtitle files to remove ad lines.
    Read subtitle file paths from standard input, process each file to remove ad lines,
    and print the result. Keep track of the modified files and print
    a summary at the end.
    """
    parser = argparse.ArgumentParser(description="Remove advertisements from subtitle files.")
    parser.add_argument(
        "--db-location",
@ -427,19 +394,59 @@ def main():
    parser.add_argument("--version", action="store_true", help="Show version information and exit")
    parser.add_argument("--reset-db", action="store_true", help="Reset the database (remove all stored file hashes)")
    parser.add_argument("--list-patterns", action="store_true", help="List all advertisement patterns being used")
-    args = parser.parse_args()
+    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="Increase output verbosity (show analyzing/skipping messages)",
    )
    return parser.parse_args()
 def _print_version():
    """Print the application version."""
    try:
        from subscleaner import __version__
        print(f"Subscleaner version {__version__}")
    except ImportError:
        import importlib.metadata
        version = importlib.metadata.version("subscleaner")
        print(f"Subscleaner version {version}")
 def _reset_database(db_path):
    """Reset the database file."""
    if db_path.exists():
        try:
            db_path.unlink()
            print(f"Database reset successfully: {db_path}")
        except Exception as e:
            print(f"Error resetting database: {e}")
    else:
        print(f"No database found at {db_path}")
 def _list_patterns():
    """List the configured ad patterns."""
    print("Advertisement patterns being used:")
    for i, pattern in enumerate(AD_PATTERNS, 1):
        print(f"{i}. {pattern.pattern}")
 def main():
    """
    Run the main entry point for the Subscleaner script.
    Parse arguments, handle special commands like version or reset-db,
    and processes subtitle files provided via stdin.
    """
    args = _parse_args()
    # Handle version request
    if args.version:
-        try:
+        _print_version()
            from subscleaner import __version__
            print(f"Subscleaner version {__version__}")
        except ImportError:
            import importlib.metadata
            version = importlib.metadata.version("subscleaner")
            print(f"Subscleaner version {version}")
        return
    # Get database path
@ -447,24 +454,15 @@ def main():
    # Handle reset database request
    if args.reset_db:
-        if db_path.exists():
+        _reset_database(db_path)
            try:
                db_path.unlink()
                print(f"Database reset successfully: {db_path}")
            except Exception as e:
                print(f"Error resetting database: {e}")
        else:
            print(f"No database found at {db_path}")
        return
    # Handle list patterns request
    if args.list_patterns:
-        print("Advertisement patterns being used:")
+        _list_patterns()
        for i, pattern in enumerate(AD_PATTERNS, 1):
            print(f"{i}. {pattern.pattern}")
        return
-    # Initialize database
+    # Initialize database if not resetting
    init_db(db_path)
    # Process subtitle files
@ -473,8 +471,9 @@ def main():
        print("No subtitle files provided. Pipe filenames to subscleaner or use --help for more information.")
        return
-    print("Starting script")
+    if args.verbose:
-    modified_files = process_subtitle_files(subtitle_files, db_path, args.force)
+        print("Starting script")
    modified_files = process_subtitle_files(subtitle_files, db_path, args.force, args.verbose)
    if modified_files:
        print(f"Modified {len(modified_files)} files")
    print("Done")
--- a/tests/test_subscleaner.py
+++ b/tests/test_subscleaner.py
@ -11,7 +11,6 @@ import pytest
 from src.subscleaner.subscleaner import (
    contains_ad,
    get_encoding,
    is_processed_before,
    main,
    process_subtitle_file,
    process_subtitle_files,
@ -99,23 +98,6 @@ def test_contains_ad(subtitle_line, expected_result):
    assert contains_ad(subtitle_line) is expected_result
 def test_is_processed_before(tmpdir):
    """
    Test the is_processed_before function.
    Args:
        tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
    """
    subtitle_file = create_sample_srt_file(tmpdir, "")
    subtitle_path = Path(subtitle_file)
    with patch("os.path.getctime", return_value=0):
        assert is_processed_before(subtitle_path) is True
    with patch("os.path.getctime", return_value=9999999999):
        assert is_processed_before(subtitle_path) is False
 def test_get_encoding(tmpdir, sample_srt_content):
    """
    Test the get_encoding function.
@ -157,7 +139,6 @@ def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content, mock_
    """
    subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
    with (
        patch("src.subscleaner.subscleaner.is_processed_before", return_value=True),
        patch("src.subscleaner.subscleaner.is_file_processed", return_value=True),
    ):
        assert process_subtitle_file(subtitle_file, mock_db_path) is False
@ -174,7 +155,6 @@ def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content, moc
    """
    subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
    with (
        patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
        patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
        patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
        patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -211,8 +191,8 @@ def test_process_subtitle_files(tmpdir, sample_srt_content, mock_db_path):
        assert modified_subtitle_files == [subtitle_file1]
        assert mock_process.call_count == 2  # noqa PLR2004
        # Check that db_path was passed to process_subtitle_file
-        mock_process.assert_any_call(subtitle_file1, mock_db_path, False)
+        mock_process.assert_any_call(subtitle_file1, mock_db_path, False, False)
-        mock_process.assert_any_call(subtitle_file2, mock_db_path, False)
+        mock_process.assert_any_call(subtitle_file2, mock_db_path, False, False)
 def test_main_no_modification(tmpdir, sample_srt_content):
@ -233,7 +213,7 @@ def test_main_no_modification(tmpdir, sample_srt_content):
        patch("src.subscleaner.subscleaner.process_subtitle_files", return_value=[]) as mock_process_subtitle_files,
    ):
        main()
-        mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False)
+        mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False, False)
 def test_main_with_modification(tmpdir, sample_srt_content):
@ -257,7 +237,7 @@ def test_main_with_modification(tmpdir, sample_srt_content):
        ) as mock_process_subtitle_files,
    ):
        main()
-        mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False)
+        mock_process_subtitle_files.assert_called_once_with([subtitle_file], Path("/tmp/test_db.db"), False, False)
 def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
@ -272,7 +252,6 @@ def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_con
    special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
    with (
        patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
        patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
        patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
        patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -305,27 +284,6 @@ def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_cont
        pytest.fail(f"get_encoding raised {e} with non-existent file")
 def test_is_processed_before_with_special_chars(special_chars_temp_dir):
    """
    Test is_processed_before function with special character filenames.
    Args:
        special_chars_temp_dir: Temporary directory for special character files
    """
    file_path = special_chars_temp_dir / "check_processed_ümlaut.srt"
    with open(file_path, "w", encoding="utf-8") as f:
        f.write("Test content")
    with patch("os.path.getctime", return_value=0):
        assert is_processed_before(file_path) is True
    with patch("os.path.getctime", return_value=9999999999):
        assert is_processed_before(file_path) is False
    non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
    assert is_processed_before(non_existent_file) is False
 def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
    """
    Test process_subtitle_file function with special character filenames.
@ -340,7 +298,6 @@ def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample
        f.write(sample_srt_content)
    with (
        patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
        patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
        patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
        patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -367,7 +324,6 @@ def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_conte
    special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
    with (
        patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
        patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
        patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
        patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -410,4 +366,4 @@ def test_main_with_special_chars(special_chars_temp_dir, sample_srt_content):
        ) as mock_process_subtitle_files,
    ):
        main()
-        mock_process_subtitle_files.assert_called_once_with([str(file_path)], Path("/tmp/test_db.db"), False)
+        mock_process_subtitle_files.assert_called_once_with([str(file_path)], Path("/tmp/test_db.db"), False, False)
--- a/uv.lock
+++ b/uv.lock
@ -456,7 +456,7 @@ wheels = [
 [[package]]
 name = "subscleaner"
-version = "1.3.0"
+version = "2.1.1"
 source = { editable = "." }
 dependencies = [
    { name = "appdirs" },
Author	SHA1	Message	Date
Roger Gonzalez	254e6b9c26	Bump version to 2.1.1 - Update __version__ in subscleaner/__init__.py. - Update version in pyproject.toml. - Update version in uv.lock.	2025-03-29 10:43:05 -03:00
Roger Gonzalez	5dbfbcf888	Add verbose output option to README	2025-03-29 10:42:23 -03:00
Roger Gonzalez	0a51edb883	Bump project version to 2.1.0 - Update project version in pyproject.toml. - Update __version__ in subscleaner/__init__.py. - Update version in uv.lock.	2025-03-29 10:37:24 -03:00
Roger Gonzalez	fb14e7f5f8	Add verbose flag and improve CLI - Added a verbose flag to control script output. - Updated tests to reflect changes. - Added docstrings to functions. - Improved overall code structure and readability.	2025-03-29 10:35:52 -03:00
Roger Gonzalez	7a463713c2	Bump version to 2.0.1	2025-03-28 22:12:43 -03:00
Roger Gonzalez	cda431aa85	Bump subscleaner version to 2.0.0	2025-03-28 22:12:23 -03:00
Roger Gonzalez	f54223db41	Fix: Remove timestamp-based processing check - The timestamp-based check for processed files was unreliable and prone to errors due to potential clock discrepancies. - This check has been removed, and processing now solely relies on the database status.	2025-03-28 22:12:07 -03:00
Roger Gonzalez	1c7fba5f5b	Update database location documentation - Clarifies the database location paths in README.md. - Removes redundant "subscleaner" directory name.	2025-03-28 21:28:55 -03:00
`@ -1,3 +1,3 @@`
	`"""Subscleaner package."""`	`"""Subscleaner package."""`

	`__version__ = "1.3.0"`	`__version__ = "2.1.1"`