From f54223db41fb6031f0ae5d0868300e28ef7fe3f4 Mon Sep 17 00:00:00 2001 From: Roger Gonzalez Date: Fri, 28 Mar 2025 22:12:07 -0300 Subject: [PATCH] Fix: Remove timestamp-based processing check - The timestamp-based check for processed files was unreliable and prone to errors due to potential clock discrepancies. - This check has been removed, and processing now solely relies on the database status. --- src/subscleaner/subscleaner.py | 33 +------------------------ tests/test_subscleaner.py | 44 ---------------------------------- 2 files changed, 1 insertion(+), 76 deletions(-) diff --git a/src/subscleaner/subscleaner.py b/src/subscleaner/subscleaner.py index e080e14..3902be3 100755 --- a/src/subscleaner/subscleaner.py +++ b/src/subscleaner/subscleaner.py @@ -20,12 +20,10 @@ along with this program. If not, see . import argparse import hashlib -import os import pathlib import re import sqlite3 import sys -import time import chardet import pysrt @@ -229,27 +227,6 @@ def contains_ad(subtitle_line: str) -> bool: return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS) -def is_processed_before(subtitle_file: pathlib.Path) -> bool: - """ - Check if the subtitle file has already been processed. - - Args: - subtitle_file (pathlib.Path): The path to the subtitle file. - - Returns: - bool: True if the subtitle file has already been processed, False otherwise. - """ - try: - file_creation_time = os.path.getctime(subtitle_file) - processed_timestamp = time.mktime( - time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"), - ) - return file_creation_time < processed_timestamp - except Exception as e: - print(f"Error checking if file was processed before: {e}") - return False - - def get_encoding(subtitle_file: pathlib.Path) -> str: """ Detect the encoding of the subtitle file. @@ -297,8 +274,7 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False): """ Check if the subtitle file has already been processed. - This function checks both the database and the timestamp to determine - if a file has already been processed. + This function checks the database to determine if a file has already been processed. Args: subtitle_file (pathlib.Path): The path to the subtitle file. @@ -317,13 +293,6 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False): print(f"Already processed {subtitle_file} (hash match)") return True - # Check based on timestamp - if is_processed_before(subtitle_file): - print(f"Already processed {subtitle_file} (timestamp check)") - # Still mark it in the database - mark_file_processed(db_path, str(subtitle_file), file_hash) - return True - return False diff --git a/tests/test_subscleaner.py b/tests/test_subscleaner.py index 04b8240..8f8fa39 100644 --- a/tests/test_subscleaner.py +++ b/tests/test_subscleaner.py @@ -11,7 +11,6 @@ import pytest from src.subscleaner.subscleaner import ( contains_ad, get_encoding, - is_processed_before, main, process_subtitle_file, process_subtitle_files, @@ -99,23 +98,6 @@ def test_contains_ad(subtitle_line, expected_result): assert contains_ad(subtitle_line) is expected_result -def test_is_processed_before(tmpdir): - """ - Test the is_processed_before function. - - Args: - tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file. - """ - subtitle_file = create_sample_srt_file(tmpdir, "") - subtitle_path = Path(subtitle_file) - - with patch("os.path.getctime", return_value=0): - assert is_processed_before(subtitle_path) is True - - with patch("os.path.getctime", return_value=9999999999): - assert is_processed_before(subtitle_path) is False - - def test_get_encoding(tmpdir, sample_srt_content): """ Test the get_encoding function. @@ -157,7 +139,6 @@ def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content, mock_ """ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) with ( - patch("src.subscleaner.subscleaner.is_processed_before", return_value=True), patch("src.subscleaner.subscleaner.is_file_processed", return_value=True), ): assert process_subtitle_file(subtitle_file, mock_db_path) is False @@ -174,7 +155,6 @@ def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content, moc """ subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) with ( - patch("src.subscleaner.subscleaner.is_processed_before", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.mark_file_processed"), @@ -272,7 +252,6 @@ def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_con special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content) with ( - patch("src.subscleaner.subscleaner.is_processed_before", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.mark_file_processed"), @@ -305,27 +284,6 @@ def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_cont pytest.fail(f"get_encoding raised {e} with non-existent file") -def test_is_processed_before_with_special_chars(special_chars_temp_dir): - """ - Test is_processed_before function with special character filenames. - - Args: - special_chars_temp_dir: Temporary directory for special character files - """ - file_path = special_chars_temp_dir / "check_processed_ümlaut.srt" - with open(file_path, "w", encoding="utf-8") as f: - f.write("Test content") - - with patch("os.path.getctime", return_value=0): - assert is_processed_before(file_path) is True - - with patch("os.path.getctime", return_value=9999999999): - assert is_processed_before(file_path) is False - - non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt" - assert is_processed_before(non_existent_file) is False - - def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path): """ Test process_subtitle_file function with special character filenames. @@ -340,7 +298,6 @@ def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample f.write(sample_srt_content) with ( - patch("src.subscleaner.subscleaner.is_processed_before", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.mark_file_processed"), @@ -367,7 +324,6 @@ def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_conte special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content) with ( - patch("src.subscleaner.subscleaner.is_processed_before", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.mark_file_processed"),