Fix: Remove timestamp-based processing check
- The timestamp-based check for processed files was unreliable and prone to errors due to potential clock discrepancies. - This check has been removed, and processing now solely relies on the database status.
This commit is contained in:
parent
1c7fba5f5b
commit
f54223db41
@ -20,12 +20,10 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
|
|
||||||
import chardet
|
import chardet
|
||||||
import pysrt
|
import pysrt
|
||||||
@ -229,27 +227,6 @@ def contains_ad(subtitle_line: str) -> bool:
|
|||||||
return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
|
return any(pattern.search(subtitle_line) for pattern in AD_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
def is_processed_before(subtitle_file: pathlib.Path) -> bool:
|
|
||||||
"""
|
|
||||||
Check if the subtitle file has already been processed.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
subtitle_file (pathlib.Path): The path to the subtitle file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: True if the subtitle file has already been processed, False otherwise.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
file_creation_time = os.path.getctime(subtitle_file)
|
|
||||||
processed_timestamp = time.mktime(
|
|
||||||
time.strptime("2021-05-13 00:00:00", "%Y-%m-%d %H:%M:%S"),
|
|
||||||
)
|
|
||||||
return file_creation_time < processed_timestamp
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error checking if file was processed before: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_encoding(subtitle_file: pathlib.Path) -> str:
|
def get_encoding(subtitle_file: pathlib.Path) -> str:
|
||||||
"""
|
"""
|
||||||
Detect the encoding of the subtitle file.
|
Detect the encoding of the subtitle file.
|
||||||
@ -297,8 +274,7 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False):
|
|||||||
"""
|
"""
|
||||||
Check if the subtitle file has already been processed.
|
Check if the subtitle file has already been processed.
|
||||||
|
|
||||||
This function checks both the database and the timestamp to determine
|
This function checks the database to determine if a file has already been processed.
|
||||||
if a file has already been processed.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
subtitle_file (pathlib.Path): The path to the subtitle file.
|
subtitle_file (pathlib.Path): The path to the subtitle file.
|
||||||
@ -317,13 +293,6 @@ def is_already_processed(subtitle_file, db_path, file_hash, force=False):
|
|||||||
print(f"Already processed {subtitle_file} (hash match)")
|
print(f"Already processed {subtitle_file} (hash match)")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check based on timestamp
|
|
||||||
if is_processed_before(subtitle_file):
|
|
||||||
print(f"Already processed {subtitle_file} (timestamp check)")
|
|
||||||
# Still mark it in the database
|
|
||||||
mark_file_processed(db_path, str(subtitle_file), file_hash)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ import pytest
|
|||||||
from src.subscleaner.subscleaner import (
|
from src.subscleaner.subscleaner import (
|
||||||
contains_ad,
|
contains_ad,
|
||||||
get_encoding,
|
get_encoding,
|
||||||
is_processed_before,
|
|
||||||
main,
|
main,
|
||||||
process_subtitle_file,
|
process_subtitle_file,
|
||||||
process_subtitle_files,
|
process_subtitle_files,
|
||||||
@ -99,23 +98,6 @@ def test_contains_ad(subtitle_line, expected_result):
|
|||||||
assert contains_ad(subtitle_line) is expected_result
|
assert contains_ad(subtitle_line) is expected_result
|
||||||
|
|
||||||
|
|
||||||
def test_is_processed_before(tmpdir):
|
|
||||||
"""
|
|
||||||
Test the is_processed_before function.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tmpdir (pytest.fixture): A temporary directory for creating the sample SRT file.
|
|
||||||
"""
|
|
||||||
subtitle_file = create_sample_srt_file(tmpdir, "")
|
|
||||||
subtitle_path = Path(subtitle_file)
|
|
||||||
|
|
||||||
with patch("os.path.getctime", return_value=0):
|
|
||||||
assert is_processed_before(subtitle_path) is True
|
|
||||||
|
|
||||||
with patch("os.path.getctime", return_value=9999999999):
|
|
||||||
assert is_processed_before(subtitle_path) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_encoding(tmpdir, sample_srt_content):
|
def test_get_encoding(tmpdir, sample_srt_content):
|
||||||
"""
|
"""
|
||||||
Test the get_encoding function.
|
Test the get_encoding function.
|
||||||
@ -157,7 +139,6 @@ def test_process_subtitle_file_no_modification(tmpdir, sample_srt_content, mock_
|
|||||||
"""
|
"""
|
||||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||||
with (
|
with (
|
||||||
patch("src.subscleaner.subscleaner.is_processed_before", return_value=True),
|
|
||||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=True),
|
patch("src.subscleaner.subscleaner.is_file_processed", return_value=True),
|
||||||
):
|
):
|
||||||
assert process_subtitle_file(subtitle_file, mock_db_path) is False
|
assert process_subtitle_file(subtitle_file, mock_db_path) is False
|
||||||
@ -174,7 +155,6 @@ def test_process_subtitle_file_with_modification(tmpdir, sample_srt_content, moc
|
|||||||
"""
|
"""
|
||||||
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
subtitle_file = create_sample_srt_file(tmpdir, sample_srt_content)
|
||||||
with (
|
with (
|
||||||
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
|
|
||||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||||
@ -272,7 +252,6 @@ def test_process_files_with_special_chars(special_chars_temp_dir, sample_srt_con
|
|||||||
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
|
|
||||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||||
@ -305,27 +284,6 @@ def test_get_encoding_with_special_chars(special_chars_temp_dir, sample_srt_cont
|
|||||||
pytest.fail(f"get_encoding raised {e} with non-existent file")
|
pytest.fail(f"get_encoding raised {e} with non-existent file")
|
||||||
|
|
||||||
|
|
||||||
def test_is_processed_before_with_special_chars(special_chars_temp_dir):
|
|
||||||
"""
|
|
||||||
Test is_processed_before function with special character filenames.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
special_chars_temp_dir: Temporary directory for special character files
|
|
||||||
"""
|
|
||||||
file_path = special_chars_temp_dir / "check_processed_ümlaut.srt"
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write("Test content")
|
|
||||||
|
|
||||||
with patch("os.path.getctime", return_value=0):
|
|
||||||
assert is_processed_before(file_path) is True
|
|
||||||
|
|
||||||
with patch("os.path.getctime", return_value=9999999999):
|
|
||||||
assert is_processed_before(file_path) is False
|
|
||||||
|
|
||||||
non_existent_file = special_chars_temp_dir / "non_existent_ümlaut.srt"
|
|
||||||
assert is_processed_before(non_existent_file) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
|
def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample_srt_content, mock_db_path):
|
||||||
"""
|
"""
|
||||||
Test process_subtitle_file function with special character filenames.
|
Test process_subtitle_file function with special character filenames.
|
||||||
@ -340,7 +298,6 @@ def test_process_subtitle_file_with_special_chars(special_chars_temp_dir, sample
|
|||||||
f.write(sample_srt_content)
|
f.write(sample_srt_content)
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
|
|
||||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||||
@ -367,7 +324,6 @@ def test_file_saving_with_special_chars(special_chars_temp_dir, sample_srt_conte
|
|||||||
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
special_files = create_special_char_files(special_chars_temp_dir, sample_srt_content)
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("src.subscleaner.subscleaner.is_processed_before", return_value=False),
|
|
||||||
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
patch("src.subscleaner.subscleaner.is_file_processed", return_value=False),
|
||||||
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
patch("src.subscleaner.subscleaner.get_file_hash", return_value="mockhash"),
|
||||||
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
patch("src.subscleaner.subscleaner.mark_file_processed"),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user