Fix: Remove timestamp-based processing check

- The timestamp-based check for processed files was unreliable and prone to errors due to potential clock discrepancies.
- This check has been removed, and processing now solely relies on the database status.
This commit is contained in:
Roger Gonzalez 2025-03-28 22:12:07 -03:00
parent 1c7fba5f5b
commit f54223db41
Signed by: rogs
GPG Key ID: C7ECE9C6C36EC2E6
2 changed files with 1 additions and 76 deletions

View File

@ -20,12 +20,10 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
import argparse import argparse
import hashlib import hashlib
import os
import pathlib import pathlib
import re import re
import sqlite3 import sqlite3
import sys import sys
import time
import chardet import chardet
import pysrt import pysrt
@ -229,27 +227,6 @@ def contains_ad(subtitle_line: str) -> bool:
return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS) return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
def is_processed_before(subtitle_file: pathlib.Path) -> bool:
"""
Check if the subtitle file has already been processed.
Args:
subtitle_file (pathlib.Path): The path to the subtitle file.
Returns:
bool: True if the subtitle file has already been processed, False otherwise.
"""
try:
file_creation_time = os.path.getctime(subtitle_file)
processed_timestamp = time.mktime(
time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
)
return file_creation_time < processed_timestamp
except Exception as e:
print(f"Error checking if file was processed before: {e}")
return False
def get_encoding(subtitle_file: pathlib.Path) -> str: def get_encoding(subtitle_file: pathlib.Path) -> str:
""" """
Detect the encoding of the subtitle file. Detect the encoding of the subtitle file.
@ -297,8 +274,7 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False):
""" """
Check if the subtitle file has already been processed. Check if the subtitle file has already been processed.
This function checks both the database and the timestamp to determine This function checks the database to determine if a file has already been processed.
if a file has already been processed.
Args: Args:
subtitle_file (pathlib.Path): The path to the subtitle file. subtitle_file (pathlib.Path): The path to the subtitle file.
@ -317,13 +293,6 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False):
print(f"Already processed {subtitle_file} (hash match)") print(f"Already processed {subtitle_file} (hash match)")
return True return True
# Check based on timestamp
if is_processed_before(subtitle_file):
print(f"Already processed {subtitle_file} (timestamp check)")
# Still mark it in the database
mark_file_processed(db_path, str(subtitle_file), file_hash)
return True
return False return False

View File

@ -11,7 +11,6 @@ import pytest
from src.subscleaner.subscleaner import ( from src.subscleaner.subscleaner import (
contains_ad, contains_ad,
get_encoding, get_encoding,
is_processed_before,
main, main,
process_subtitle_file, process_subtitle_file,
process_subtitle_files, process_subtitle_files,
@ -99,23 +98,6 @@ def test_contains_ad(subtitle_line, expected_result):
assert contains_ad(subtitle_line) is expected_result assert contains_ad(subtitle_line) is expected_result
def test_is_processed_before(tmpdir):
"""
Test the is_processed_before function.
Args:
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
"""
subtitle_file = create_sample_srt_file(tmpdir, "")
subtitle_path = Path(subtitle_file)
with patch("os.path.getctime", return_value=0):
assert is_processed_before(subtitle_path) is True
with patch("os.path.getctime", return_value=9999999999):
assert is_processed_before(subtitle_path) is False
def test_get_encoding(tmpdir, sample_srt_content): def test_get_encoding(tmpdir, sample_srt_content):
""" """
Test the get_encoding function. Test the get_encoding function.
@ -157,7 +139,6 @@ def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content, mock_
""" """
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
with ( with (
patch("src.subscleaner.subscleaner.is_processed_before", return_value=True),
patch("src.subscleaner.subscleaner.is_file_processed", return_value=True), patch("src.subscleaner.subscleaner.is_file_processed", return_value=True),
): ):
assert process_subtitle_file(subtitle_file, mock_db_path) is False assert process_subtitle_file(subtitle_file, mock_db_path) is False
@ -174,7 +155,6 @@ def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content, moc
""" """
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content) subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
with ( with (
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
patch("src.subscleaner.subscleaner.mark_file_processed"), patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -272,7 +252,6 @@ def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_con
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content) special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
with ( with (
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
patch("src.subscleaner.subscleaner.mark_file_processed"), patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -305,27 +284,6 @@ def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_cont
pytest.fail(f"get_encoding raised {e} with non-existent file") pytest.fail(f"get_encoding raised {e} with non-existent file")
def test_is_processed_before_with_special_chars(special_chars_temp_dir):
"""
Test is_processed_before function with special character filenames.
Args:
special_chars_temp_dir: Temporary directory for special character files
"""
file_path = special_chars_temp_dir / "check_processed_ümlaut.srt"
with open(file_path, "w", encoding="utf-8") as f:
f.write("Test content")
with patch("os.path.getctime", return_value=0):
assert is_processed_before(file_path) is True
with patch("os.path.getctime", return_value=9999999999):
assert is_processed_before(file_path) is False
non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
assert is_processed_before(non_existent_file) is False
def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path): def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
""" """
Test process_subtitle_file function with special character filenames. Test process_subtitle_file function with special character filenames.
@ -340,7 +298,6 @@ def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample
f.write(sample_srt_content) f.write(sample_srt_content)
with ( with (
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
patch("src.subscleaner.subscleaner.mark_file_processed"), patch("src.subscleaner.subscleaner.mark_file_processed"),
@ -367,7 +324,6 @@ def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_conte
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content) special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
with ( with (
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False), patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"), patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
patch("src.subscleaner.subscleaner.mark_file_processed"), patch("src.subscleaner.subscleaner.mark_file_processed"),