[Scummvm-git-logs] scummvm-sites integrity -> 3ac672af9cc0e2c2cab6cb25f2256a94111bd814
sev-
noreply at scummvm.org
Fri Jul 18 20:15:30 UTC 2025
This automated email contains information about 32 new commits which have been
pushed to the 'scummvm-sites' repo located at https://api.github.com/repos/scummvm/scummvm-sites .
Summary:
892d712467 INTEGRITY: Increase character limit size for log text
0a673255be INTEGRITY: Separate the additional checksum add logic from insert_filechecksum. Add new equal checksums for set.dat's fi
b16986fe7d INTEGRITY: Add filtering by platform for set.dat to reduce manual merge.
1cfe4abbae INTEGRITY: Avoid adding a fileset as candidate if it was marked as partial in the same run.
924d1d3adb INTEGRITY: Add additional filtering logic for glk engines
9f8befe92b INTEGRITY: Add timestamp field in scan.dat and filtering support via modification time
e7970559c0 INTEGRITY: Add all size variants to scan.dat - size, size-r and size-rd.
a2bfb8dd65 INTEGRITY: Fix clear database hang issue. Now the database is dropped and recreated.
9c32032912 INTEGRITY: Remove global database connection object from fileset.py, which is never closed.
835ad1fce5 INTEGRITY: Filter manual merge candidates if size mismatch.
37e8a37782 INTEGRITY: Add metadata for set.dat
5c90d3d63c INTEGRITY: Add navbar with logo.
fe850f82ee INTEGRITY: Add modification timestamps for macfiles
9c039e379a INTEGIRTY: Add punycode encoding for scan utlity.
9a3f55602c INTEGRITY: Fix the navbar on top.
b610e5f45f INTEGRITY: Limit match fileset to 1 in remove_manual_merge_if_size_mismatch
6a6847c2bf INTEGRITY: Improve console logging with progress update.
fa13ffdef0 INTEGRITY: Remove custom recursive path split function.
f5a99e4874 INTEGRITY: Use INFORMATION_SCHEMA.COLUMNS instead of relying on error handling for column migration.
7a32cd385d INTEGRITY: Add scan processing logic.
18deb0da65 INTEGRITY: Add additional modification-time column in file table.
d27e751324 INTEGRITY: Additional error handling while extracing keys from scummvm.dat.
db8ab76b5c INTEGRITY: Traverse set.dat instead of candidate fileset while searching mismatched files.
a683424205 INTEGRITY: Add checksum based filtering in set.dat, when possible.
b8675f477e INTEGRITY: Remove 'obsolete' fileset status entirely.
5891b9295d INTEGRITY: Add checksum based filtering before filtering by maximum number of files matched.
62af22a0b5 INTEGRITY: Merge one of the entries from dropped duplicate entries. Drop others.
d2aaa54f71 INTEGRITY: Merge filtering logic for glk with existing set.dat filtering.
fc88959d72 INTEGRITY: Add checksum filtering before max files filtering in scan.dat processing
4fe8d4f141 INTEGRITY: Parameterising all sql queries in db_functions.py
10dc79f415 INTEGRITY: Parametrise sql queries in all files.
3ac672af9c INTEGRITY: Run ruff formatter on compute_hash.py.
Commit: 892d7124675a3b207793a4a3b943339d6c94424d
https://github.com/scummvm/scummvm-sites/commit/892d7124675a3b207793a4a3b943339d6c94424d
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Increase character limit size for log text
Changed paths:
schema.py
diff --git a/schema.py b/schema.py
index 09cbe7f..2ebfeeb 100644
--- a/schema.py
+++ b/schema.py
@@ -202,6 +202,10 @@ except Exception:
cursor.execute(
"ALTER TABLE file MODIFY COLUMN `size-r` BIGINT DEFAULT 0, MODIFY COLUMN `size-rd` BIGINT DEFAULT 0;"
)
+try:
+ cursor.execute("ALTER TABLE log ADD COLUMN `text` varchar(1000);")
+except Exception:
+ cursor.execute("ALTER TABLE log MODIFY COLUMN `text` varchar(1000);")
for index, definition in indices.items():
Commit: 0a673255be16e2a37110d06584e11a8f0e72240e
https://github.com/scummvm/scummvm-sites/commit/0a673255be16e2a37110d06584e11a8f0e72240e
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Separate the additional checksum add logic from insert_filechecksum. Add new equal checksums for set.dat's fileset match
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 45adc5d..de74d58 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -233,26 +233,34 @@ def insert_file(file, detection, src, conn):
cursor.execute("SET @file_last = LAST_INSERT_ID()")
-def insert_filechecksum(file, checktype, conn):
+def insert_filechecksum(file, checktype, file_id, conn):
if checktype not in file:
return
checksum = file[checktype]
checksize, checktype, checksum = get_checksum_props(checktype, checksum)
- query = f"INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, '{checksize}', '{checktype}', '{checksum}')"
+ query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+ with conn.cursor() as cursor:
+ cursor.execute(query, (file_id, checksize, checktype, checksum))
+
+ add_all_equal_checksums(checksize, checktype, checksum, file_id, conn)
+
+
+def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
+ """
+ We can update all the checksums when file size is less than the checksum size type, as all checksums are equal in that case.
+ """
with conn.cursor() as cursor:
- cursor.execute(query)
if "md5" not in checktype:
return
-
size_name = "size"
if checktype[-1] == "r":
size_name += "-rd"
if checktype[-1] == "s":
size_name += "-d"
- cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = @file_last")
+ cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = {file_id}")
result = cursor.fetchone()
if not result:
return
@@ -281,9 +289,10 @@ def insert_filechecksum(file, checktype, conn):
checksum_size = exploded.pop()
checksum_type = "-".join(exploded)
- query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, %s, %s, %s)"
- with conn.cursor() as cursor:
- cursor.execute(query, (checksum_size, checksum_type, checksum))
+ query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+ cursor.execute(
+ query, (file_id, checksum_size, checksum_type, checksum)
+ )
def delete_filesets(conn):
@@ -558,9 +567,13 @@ def db_insert(data_arr, username=None, skiplog=False):
for file in unique_files:
insert_file(file, detection, src, conn)
+ file_id = None
+ with conn.cursor() as cursor:
+ cursor.execute("SELECT @file_last AS file_id")
+ file_id = cursor.fetchone()["file_id"]
for key, value in file.items():
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
- insert_filechecksum(file, key, conn)
+ insert_filechecksum(file, key, file_id, conn)
if detection:
conn.cursor().execute(
@@ -1070,7 +1083,6 @@ def set_perform_match(
with conn.cursor() as cursor:
if len(candidate_filesets) == 1:
matched_fileset_id = candidate_filesets[0]
-
cursor.execute(
"SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
)
@@ -1123,6 +1135,7 @@ def set_perform_match(
elif len(candidate_filesets) > 1:
found_match = False
+
for candidate_fileset in candidate_filesets:
(is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
if is_match:
@@ -1579,7 +1592,7 @@ def populate_file(fileset, fileset_id, conn, detection):
for key, value in file.items():
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
- insert_filechecksum(file, key, conn)
+ insert_filechecksum(file, key, file_id, conn)
if value in target_files_dict and not file_exists:
cursor.execute(
f"SELECT detection_type FROM file WHERE id = {target_files_dict[value]['id']}"
@@ -1683,7 +1696,10 @@ def set_populate_file(fileset, fileset_id, conn, detection):
cursor.execute("SET @file_last = LAST_INSERT_ID()")
cursor.execute("SELECT @file_last AS file_id")
- insert_filechecksum(file, "md5", conn)
+ cursor.execute("SELECT @file_last AS file_id")
+ file_id = cursor.fetchone()["file_id"]
+
+ insert_filechecksum(file, "md5", file_id, conn)
else:
query = """
@@ -1701,6 +1717,7 @@ def set_populate_file(fileset, fileset_id, conn, detection):
candidate_files[filename.lower()][0],
),
)
+
query = """
INSERT INTO filechecksum (file, checksize, checktype, checksum)
VALUES (%s, %s, %s, %s)
@@ -1714,6 +1731,14 @@ def set_populate_file(fileset, fileset_id, conn, detection):
checksum,
),
)
+
+ add_all_equal_checksums(
+ checksize,
+ checktype,
+ checksum,
+ candidate_files[filename.lower()][0],
+ conn,
+ )
seen_detection_files.add((filename.lower(), file["size"]))
@@ -1745,9 +1770,13 @@ def insert_new_fileset(
if fileset_id:
for file in fileset["rom"]:
insert_file(file, detection, src, conn)
+ file_id = None
+ with conn.cursor() as cursor:
+ cursor.execute("SELECT @file_last AS file_id")
+ file_id = cursor.fetchone()["file_id"]
for key, value in file.items():
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
- insert_filechecksum(file, key, conn)
+ insert_filechecksum(file, key, file_id, conn)
return (fileset_id, existing)
Commit: b16986fe7d30028acee0d1939d8037f46d9f152c
https://github.com/scummvm/scummvm-sites/commit/b16986fe7d30028acee0d1939d8037f46d9f152c
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add filtering by platform for set.dat to reduce manual merge.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index de74d58..f405f41 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1028,6 +1028,12 @@ def set_process(
for fileset_id, candidate_filesets in set_to_candidate_dict.items():
fileset = id_to_fileset_dict[fileset_id]
+
+ # Filter by platform to reduce manual merge
+ candidate_filesets = set_filter_by_platform(
+ fileset["name"], candidate_filesets, conn
+ )
+
(
fully_matched_filesets,
auto_merged_filesets,
@@ -1063,6 +1069,47 @@ def set_process(
create_log(escape_string(category_text), user, escape_string(log_text), conn)
+def set_filter_by_platform(gameid, candidate_filesets, conn):
+ """
+ Return - list(number) : list of fileset ids of filtered candidates.
+ The number of manual merges in case the file size is not present (equal to -1) are too high. So we try to filter by platform extracted from the gameId of the set.dat fileset. We may disable this feature later or keep it optional with a command line argument.
+ """
+ with conn.cursor() as cursor:
+ # e.g. sq2-coco3-1
+ possible_platform_names = gameid.split("-")[1:]
+
+ # Align platform names in set.dat and detection entries
+ for i, platform in enumerate(possible_platform_names):
+ if platform == "win":
+ possible_platform_names[i] = "windows"
+ elif platform == "mac":
+ possible_platform_names[i] = "macintosh"
+
+ filtered_candidate_fileset = []
+
+ for candidate_fileset_id in candidate_filesets:
+ query = """
+ SELECT g.platform
+ FROM fileset fs
+ JOIN game g ON g.id = fs.game
+ WHERE fs.id = %s
+ """
+ cursor.execute(query, (candidate_fileset_id,))
+ candidate_platform = cursor.fetchone()["platform"]
+ if candidate_platform in possible_platform_names:
+ filtered_candidate_fileset.append(candidate_fileset_id)
+
+ if len(filtered_candidate_fileset) != 0:
+ print(len(candidate_filesets), " ", len(filtered_candidate_fileset), "\n")
+
+ # If nothing was filtred, then it is likely, that platform information was not present, so we fallback to original list of candidates.
+ return (
+ candidate_filesets
+ if len(filtered_candidate_fileset) == 0
+ else filtered_candidate_fileset
+ )
+
+
def set_perform_match(
fileset,
src,
Commit: 1cfe4abbae5ca92b3aaf25a073b4def452cfe75d
https://github.com/scummvm/scummvm-sites/commit/1cfe4abbae5ca92b3aaf25a073b4def452cfe75d
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Avoid adding a fileset as candidate if it was marked as partial in the same run.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index f405f41..3b18fd6 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1034,6 +1034,16 @@ def set_process(
fileset["name"], candidate_filesets, conn
)
+ for candidate_fileset in candidate_filesets:
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT id FROM fileset WHERE status = 'current' AND id = %s",
+ (candidate_fileset),
+ )
+ result = cursor.fetchone()
+ if result:
+ candidate_filesets.remove(candidate_fileset)
+
(
fully_matched_filesets,
auto_merged_filesets,
@@ -1056,6 +1066,8 @@ def set_process(
# Final log
with conn.cursor() as cursor:
+ cursor.execute("UPDATE fileset SET status = 'partial' WHERE status = 'current'")
+
cursor.execute(
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
(transaction_id,),
@@ -1099,9 +1111,6 @@ def set_filter_by_platform(gameid, candidate_filesets, conn):
if candidate_platform in possible_platform_names:
filtered_candidate_fileset.append(candidate_fileset_id)
- if len(filtered_candidate_fileset) != 0:
- print(len(candidate_filesets), " ", len(filtered_candidate_fileset), "\n")
-
# If nothing was filtred, then it is likely, that platform information was not present, so we fallback to original list of candidates.
return (
candidate_filesets
@@ -1135,7 +1144,7 @@ def set_perform_match(
)
status = cursor.fetchone()["status"]
if status == "detection":
- update_fileset_status(cursor, matched_fileset_id, "partial")
+ update_fileset_status(cursor, matched_fileset_id, "current")
set_populate_file(fileset, matched_fileset_id, conn, detection)
auto_merged_filesets += 1
if not skiplog:
@@ -1186,7 +1195,7 @@ def set_perform_match(
for candidate_fileset in candidate_filesets:
(is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
if is_match:
- update_fileset_status(cursor, candidate_fileset, "partial")
+ update_fileset_status(cursor, candidate_fileset, "current")
set_populate_file(fileset, candidate_fileset, conn, detection)
auto_merged_filesets += 1
if not skiplog:
Commit: 924d1d3adbf1867f1bd3589c0e8808be1ecac350
https://github.com/scummvm/scummvm-sites/commit/924d1d3adbf1867f1bd3589c0e8808be1ecac350
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add additional filtering logic for glk engines
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 3b18fd6..8800a8b 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -965,9 +965,17 @@ def set_process(
if existing:
continue
- candidate_filesets = set_filter_candidate_filesets(
- fileset_id, fileset, transaction_id, conn
- )
+ # Separating out the matching logic for glk engine
+ engine_name = fileset["sourcefile"].split("-")[0]
+
+ if engine_name == "glk":
+ candidate_filesets = set_glk_filter_candidate_filesets(
+ fileset_id, fileset, transaction_id, engine_name, conn
+ )
+ else:
+ candidate_filesets = set_filter_candidate_filesets(
+ fileset_id, fileset, transaction_id, conn
+ )
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
if len(candidate_filesets) == 0:
@@ -1288,6 +1296,84 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
return (len(unmatched_files) == 0, unmatched_files)
+def set_glk_filter_candidate_filesets(
+ fileset_id, fileset, transaction_id, engine_name, conn
+):
+ """
+ Returns a list of candidate filesets for glk engines that can be merged
+ """
+ with conn.cursor() as cursor:
+ # Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
+
+ query = """
+ WITH candidate_fileset AS (
+ SELECT fs.id AS fileset_id, f.size
+ FROM file f
+ JOIN fileset fs ON f.fileset = fs.id
+ JOIN game g ON g.id = fs.game
+ JOIN engine e ON e.id = g.engine
+ JOIN transactions t ON t.fileset = fs.id
+ WHERE fs.id != %s
+ AND e.engineid = %s
+ AND f.detection = 1
+ AND t.transaction != %s
+ AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))
+ ),
+ total_detection_files AS (
+ SELECT cf.fileset_id, COUNT(*) AS detection_files_found
+ FROM candidate_fileset cf
+ GROUP BY fileset_id
+ ),
+ set_fileset AS (
+ SELECT size FROM file
+ WHERE fileset = %s
+ ),
+ matched_detection_files AS (
+ SELECT cf.fileset_id, COUNT(*) AS match_files_count
+ FROM candidate_fileset cf
+ JOIN set_fileset sf ON
+ cf.size = sf.size OR cf.size = 0
+ GROUP BY cf.fileset_id
+ ),
+ valid_matched_detection_files AS (
+ SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
+ FROM matched_detection_files mdf
+ JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
+ WHERE tdf.detection_files_found <= mdf.match_files_count
+ ),
+ max_match_count AS (
+ SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
+ )
+ SELECT vmdf.fileset_id
+ FROM valid_matched_detection_files vmdf
+ JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
+ JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
+ """
+
+ gameid_pattern = f"%{fileset['name']}%"
+
+ cursor.execute(
+ query,
+ (
+ fileset_id,
+ engine_name,
+ transaction_id,
+ fileset["name"],
+ fileset["name"],
+ gameid_pattern,
+ fileset_id,
+ ),
+ )
+ rows = cursor.fetchall()
+
+ candidates = []
+ if rows:
+ for row in rows:
+ candidates.append(row["fileset_id"])
+
+ return candidates
+
+
def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
"""
Returns a list of candidate filesets that can be merged
@@ -1715,6 +1801,13 @@ def set_populate_file(fileset, fileset_id, conn, detection):
for target_file in target_files
}
+ # For glk engines
+ candidate_file_size = {
+ target_file["size"]: target_file["id"] for target_file in target_files
+ }
+
+ engine_name = fileset["sourcefile"].split("-")[0]
+
seen_detection_files = set()
for file in fileset["rom"]:
@@ -1724,13 +1817,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
filename = os.path.basename(normalised_path(file["name"]))
- if ((filename.lower(), file["size"]) in seen_detection_files) or (
- filename.lower() not in candidate_files
+ if (engine_name == "glk" and file["size"] not in candidate_file_size) and (
+ (filename.lower(), file["size"]) in seen_detection_files
or (
- filename.lower() in candidate_files
- and (
- candidate_files[filename.lower()][1] != -1
- and candidate_files[filename.lower()][1] != file["size"]
+ filename.lower() not in candidate_files
+ or (
+ filename.lower() in candidate_files
+ and (
+ candidate_files[filename.lower()][1] != -1
+ and candidate_files[filename.lower()][1] != file["size"]
+ )
)
)
):
@@ -1764,13 +1860,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
name = %s
WHERE id = %s
"""
+
# Filtering was by filename, but we are still updating the file with the original filepath.
cursor.execute(
query,
(
file["size"],
normalised_path(file["name"]),
- candidate_files[filename.lower()][0],
+ candidate_files[filename.lower()][0]
+ if engine_name != "glk"
+ else candidate_file_size[file["size"]],
),
)
@@ -1781,7 +1880,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
cursor.execute(
query,
(
- candidate_files[filename.lower()][0],
+ candidate_files[filename.lower()][0]
+ if engine_name != "glk"
+ else candidate_file_size[file["size"]],
checksize,
checktype,
checksum,
@@ -1792,7 +1893,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
checksize,
checktype,
checksum,
- candidate_files[filename.lower()][0],
+ candidate_files[filename.lower()][0]
+ if engine_name != "glk"
+ else candidate_file_size[file["size"]],
conn,
)
seen_detection_files.add((filename.lower(), file["size"]))
Commit: 9f8befe92be7eea3327128b0dee374b311b8ea26
https://github.com/scummvm/scummvm-sites/commit/9f8befe92be7eea3327128b0dee374b311b8ea26
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add timestamp field in scan.dat and filtering support via modification time
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index 0067cfd..b5c743a 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -4,6 +4,8 @@ import argparse
import struct
import sys
from enum import Enum
+from datetime import datetime, date
+from collections import defaultdict
class FileType(Enum):
NON_MAC = "non_mac"
@@ -154,7 +156,6 @@ def is_actual_resource_fork_mac(filepath):
""" Returns boolean, checking the actual mac fork if it exists. """
resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc")
- print(resource_fork_path)
return os.path.exists(resource_fork_path)
def is_appledouble(file_byte_stream):
@@ -505,7 +506,7 @@ def file_filter(files):
for file in to_be_deleted:
del files[file]
-def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
+def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=None, alg="md5"):
""" Return dictionary containing checksums of all files in directory """
res = []
@@ -518,10 +519,14 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
for root, _, contents in os.walk(directory):
files.extend([os.path.join(root, f) for f in contents])
+ # Filter out the files based on user input date - limit_timestamps_date
+ filtered_file_map = filter_files_by_timestamp(files, limit_timestamp_date)
+
# Produce filetype and filename(name to be used in game entry) for each file
- for filepath in files:
+ for filepath in filtered_file_map:
file_collection[filepath] = file_classification(filepath)
+
# Remove extra entries of macfiles to avoid extra checksum calculation in form of non mac files
# Checksum for both the forks are calculated using a single file, so other files should be removed from the collection
file_filter(file_collection)
@@ -538,11 +543,45 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"):
relative_dir = os.path.dirname(os.path.dirname(relative_path))
relative_path = os.path.join(relative_dir, base_name)
- hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info)
+ hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info) + (filtered_file_map[file_path],)
res.append(hash_of_dir)
return res
+
+def validate_date(date_str):
+ """
+ Confirms if the user provided timestamp is in a valid format.
+ Returns the date as a datetime object.
+ """
+ formats = ["%Y-%m-%d", "%Y-%m", "%Y"]
+ for fmt in formats:
+ try:
+ return datetime.strptime(date_str, fmt).date()
+ except ValueError:
+ continue
+ raise ValueError("Invalid date format. Use YYYY, YYYY-MM, or YYYY-MM-DD")
+
+
+def filter_files_by_timestamp(files, limit_timestamps_date):
+ """
+ Removes the files those were modified after a certain timestamp provided by the user.
+ The files those were modified today are kept.
+ Returns filtered map with filepath and its modification time
+ """
+
+ filtered_file_map = defaultdict(str)
+ user_date = validate_date(limit_timestamps_date)
+ today = date.today()
+
+ for filepath in files:
+ mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
+ if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)):
+ filtered_file_map[filepath] = str(mtime)
+
+ return filtered_file_map
+
+
def create_dat_file(hash_of_dirs, path, checksum_size=0):
with open(f"{os.path.basename(path)}.dat", "w") as file:
# Header
@@ -556,8 +595,8 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
# Game files
for hash_of_dir in hash_of_dirs:
file.write("game (\n")
- for filename, (hashes, filesize) in hash_of_dir.items():
- data = f"name \"{filename}\" size {filesize}"
+ for filename, (hashes, filesize, timestamp) in hash_of_dir.items():
+ data = f"name \"{filename}\" size {filesize} timestamp {timestamp}"
for key, value in hashes:
data += f" {key} {value}"
@@ -579,10 +618,13 @@ parser.add_argument("--depth",
help="Depth from root to game directories")
parser.add_argument("--size",
help="Use first n bytes of file to calculate checksum")
+parser.add_argument("--limit-timestamps",
+ help="Format - YYYY-MM-DD or YYYY-MM or YYYY. Filters out the files those were modified after the given timestamp. Note that if the modification time is today, it would not be filtered out.")
args = parser.parse_args()
path = os.path.abspath(args.directory) if args.directory else os.getcwd()
depth = int(args.depth) if args.depth else 0
checksum_size = int(args.size) if args.size else 0
+limit_timestamp_date = str(args.limit_timestamps) if args.limit_timestamps else None
create_dat_file(compute_hash_of_dirs(
- path, depth, checksum_size), path, checksum_size)
+ path, depth, checksum_size, limit_timestamp_date), path, checksum_size)
Commit: e7970559c03ffe23a85820d31a0ad7338279871c
https://github.com/scummvm/scummvm-sites/commit/e7970559c03ffe23a85820d31a0ad7338279871c
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add all size variants to scan.dat - size, size-r and size-rd.
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index b5c743a..3b0e155 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -179,7 +179,7 @@ def is_appledouble(file_byte_stream):
return True
def macbin_get_resfork_data(file_byte_stream):
- """ Returns the resource fork's data section as bytes of a macbinary file as well as its size """
+ """ Returns the resource fork's data section as bytes, data fork size (size), resource fork size (size-r) and data section of resource fork size (size-rd) of a macbinary file """
if not file_byte_stream:
return file_byte_stream
@@ -189,10 +189,10 @@ def macbin_get_resfork_data(file_byte_stream):
(rsrclen,) = struct.unpack(">I", file_byte_stream[0x57:0x5B])
resoure_fork_offset = 128 + datalen_padded
- data_offset = int.from_bytes(file_byte_stream[resoure_fork_offset+0 : resoure_fork_offset+4])
- data_length = int.from_bytes(file_byte_stream[resoure_fork_offset+8 : resoure_fork_offset+12])
+ rd_offset = int.from_bytes(file_byte_stream[resoure_fork_offset+0 : resoure_fork_offset+4])
+ rd_length = int.from_bytes(file_byte_stream[resoure_fork_offset+8 : resoure_fork_offset+12])
- return (file_byte_stream[resoure_fork_offset + data_offset: resoure_fork_offset + data_offset + data_length], data_length)
+ return (file_byte_stream[resoure_fork_offset + rd_offset: resoure_fork_offset + rd_offset + rd_length], datalen, rsrclen, rd_length)
def macbin_get_datafork(file_byte_stream):
if not file_byte_stream:
@@ -222,7 +222,7 @@ def is_appledouble(file_byte_stream):
return True
def appledouble_get_resfork_data(file_byte_stream):
- """ Returns the resource fork's data section as bytes of an appledouble file as well as its size """
+ """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file"""
entry_count = read_be_16(file_byte_stream[24:])
for entry in range(entry_count):
@@ -233,13 +233,13 @@ def appledouble_get_resfork_data(file_byte_stream):
if id == 2:
resource_fork_stream = file_byte_stream[offset:offset+length]
- data_offset = int.from_bytes(resource_fork_stream[0:4])
- data_length = int.from_bytes(resource_fork_stream[8:12])
+ rd_offset = int.from_bytes(resource_fork_stream[0:4])
+ rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[data_offset: data_offset+data_length], data_length)
+ return (resource_fork_stream[rd_offset: rd_offset+rd_length], length, rd_length)
def appledouble_get_datafork(filepath, fileinfo):
- """ Returns data fork's content as bytes of appledouble file if found, otherwise empty byte string """
+ """ Returns data fork's content as bytes and size of data fork of an appledouble file."""
try:
index = filepath.index("__MACOSX")
except ValueError:
@@ -253,50 +253,54 @@ def appledouble_get_datafork(filepath, fileinfo):
try:
with open(data_fork_path, "rb") as f:
- return f.read()
+ data = f.read()
+ return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
return b''
def raw_rsrc_get_datafork(filepath):
- """ Returns the data fork's content as bytes corresponding to raw rsrc file. """
+ """ Returns the data fork's content as bytes and size of the data fork corresponding to raw rsrc file. """
try:
with open(filepath[:-5]+".data", "rb") as f:
- return f.read()
+ data = f.read()
+ return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
return b''
def raw_rsrc_get_resource_fork_data(filepath):
- """ Returns the resource fork's data section as bytes of a raw rsrc file as well as its size """
+ """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of a raw rsrc file."""
with open(filepath, "rb") as f:
resource_fork_stream = f.read()
- data_offset = int.from_bytes(resource_fork_stream[0:4])
- data_length = int.from_bytes(resource_fork_stream[8:12])
+ resource_fork_len = len(resource_fork_stream)
+ rd_offset = int.from_bytes(resource_fork_stream[0:4])
+ rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[data_offset: data_offset+data_length], data_length)
+ return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length)
def actual_mac_fork_get_data_fork(filepath):
- """ Returns the data fork's content as bytes if the actual mac fork exists """
+ """ Returns the data fork's content as bytes and its size if the actual mac fork exists """
try:
with open(filepath, "rb") as f:
- return f.read()
+ data = f.read()
+ return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
return b''
def actual_mac_fork_get_resource_fork_data(filepath):
- """ Returns the resource fork's data section as bytes of the actual mac fork as well as its size """
+ """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of the actual mac fork."""
resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc")
with open(resource_fork_path, "rb") as f:
resource_fork_stream = f.read()
- data_offset = int.from_bytes(resource_fork_stream[0:4])
- data_length = int.from_bytes(resource_fork_stream[8:12])
+ resource_fork_len = len(resource_fork_stream)
+ rd_offset = int.from_bytes(resource_fork_stream[0:4])
+ rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[data_offset: data_offset+data_length], data_length)
+ return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length)
-def file_checksum(filepath, alg, size, file_info):
- cur_file_size = 0
+def file_checksum(filepath, alg, custom_checksum_size, file_info):
with open(filepath, "rb") as f:
if file_info[0] == FileType.NON_MAC:
- return (create_checksum_pairs(checksum(f, alg, size, filepath), alg, size), filesize(filepath))
+ return (create_checksum_pairs(checksum(f, alg, custom_checksum_size, filepath), alg, custom_checksum_size), filesize(filepath), 0, 0)
# Processing mac files
res = []
@@ -304,29 +308,33 @@ def file_checksum(filepath, alg, size, file_info):
datafork = b''
file_data = f.read()
+ size = 0
+ size_r = 0
+ size_rd = 0
+
if file_info[0] == FileType.MAC_BINARY:
- (resfork, cur_file_size) = macbin_get_resfork_data(file_data)
+ (resfork, size, size_r, size_rd) = macbin_get_resfork_data(file_data)
datafork = macbin_get_datafork(file_data)
elif file_info[0] in {FileType.APPLE_DOUBLE_DOT_, FileType.APPLE_DOUBLE_RSRC, FileType.APPLE_DOUBLE_MACOSX}:
- (resfork, cur_file_size) = appledouble_get_resfork_data(file_data)
- datafork = appledouble_get_datafork(filepath, file_info)
+ (resfork, size_r, size_rd) = appledouble_get_resfork_data(file_data)
+ (datafork, size) = appledouble_get_datafork(filepath, file_info)
elif file_info[0] == FileType.RAW_RSRC:
- (resfork, cur_file_size) = raw_rsrc_get_resource_fork_data(filepath)
- datafork = raw_rsrc_get_datafork(filepath)
+ (resfork, size_r, size_rd) = raw_rsrc_get_resource_fork_data(filepath)
+ datafork, size = raw_rsrc_get_datafork(filepath)
elif file_info[0] == FileType.ACTUAL_FORK_MAC:
- (resfork, cur_file_size) = actual_mac_fork_get_resource_fork_data(filepath)
- datafork = actual_mac_fork_get_data_fork(filepath)
+ (resfork, size_r, size_rd) = actual_mac_fork_get_resource_fork_data(filepath)
+ (datafork, size) = actual_mac_fork_get_data_fork(filepath)
- hashes = checksum(resfork, alg, size, filepath)
+ hashes = checksum(resfork, alg, custom_checksum_size, filepath)
prefix = 'r'
if len(resfork):
- res.extend(create_checksum_pairs(hashes, alg, size, prefix))
+ res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix))
- hashes = checksum(datafork, alg, size, filepath)
+ hashes = checksum(datafork, alg, custom_checksum_size, filepath)
prefix = 'd'
- res.extend(create_checksum_pairs(hashes, alg, size, prefix))
+ res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix))
- return (res, cur_file_size)
+ return (res, size, size_r, size_rd)
def create_checksum_pairs(hashes, alg, size, prefix=None):
res = []
@@ -571,7 +579,8 @@ def filter_files_by_timestamp(files, limit_timestamps_date):
"""
filtered_file_map = defaultdict(str)
- user_date = validate_date(limit_timestamps_date)
+ if limit_timestamp_date is not None:
+ user_date = validate_date(limit_timestamps_date)
today = date.today()
for filepath in files:
@@ -595,8 +604,8 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
# Game files
for hash_of_dir in hash_of_dirs:
file.write("game (\n")
- for filename, (hashes, filesize, timestamp) in hash_of_dir.items():
- data = f"name \"{filename}\" size {filesize} timestamp {timestamp}"
+ for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items():
+ data = f"name \"{filename}\" size {size} size-r {size_r} size-rd {size_rd} timestamp {timestamp}"
for key, value in hashes:
data += f" {key} {value}"
Commit: a2bfb8dd657942da7a54dc1f6b08227dd2a4f65e
https://github.com/scummvm/scummvm-sites/commit/a2bfb8dd657942da7a54dc1f6b08227dd2a4f65e
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Fix clear database hang issue. Now the database is dropped and recreated.
Changed paths:
db_functions.py
fileset.py
schema.py
diff --git a/db_functions.py b/db_functions.py
index 8800a8b..393906b 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -32,6 +32,24 @@ def db_connect():
return conn
+def db_connect_root():
+ base_dir = os.path.dirname(os.path.abspath(__file__))
+ config_path = os.path.join(base_dir, "mysql_config.json")
+ with open(config_path) as f:
+ mysql_cred = json.load(f)
+
+ conn = pymysql.connect(
+ host=mysql_cred["servername"],
+ user=mysql_cred["username"],
+ password=mysql_cred["password"],
+ charset="utf8mb4",
+ cursorclass=pymysql.cursors.DictCursor,
+ autocommit=True,
+ )
+
+ return (conn, mysql_cred["dbname"])
+
+
def get_checksum_props(checkcode, checksum):
checksize = 0
checktype = checkcode
diff --git a/fileset.py b/fileset.py
index 9b9dc93..3f7bc40 100644
--- a/fileset.py
+++ b/fileset.py
@@ -23,8 +23,10 @@ from db_functions import (
user_integrity_check,
db_connect,
create_log,
+ db_connect_root,
)
from collections import defaultdict
+from schema import init_database
app = Flask(__name__)
@@ -79,21 +81,13 @@ def index():
@app.route("/clear_database", methods=["POST"])
def clear_database():
try:
- conn = db_connect()
+ (conn, db_name) = db_connect_root()
with conn.cursor() as cursor:
- cursor.execute("SET FOREIGN_KEY_CHECKS = 0;")
- cursor.execute("TRUNCATE TABLE filechecksum")
- cursor.execute("TRUNCATE TABLE history")
- cursor.execute("TRUNCATE TABLE transactions")
- cursor.execute("TRUNCATE TABLE queue")
- cursor.execute("TRUNCATE TABLE file")
- cursor.execute("TRUNCATE TABLE fileset")
- cursor.execute("TRUNCATE TABLE game")
- cursor.execute("TRUNCATE TABLE engine")
- cursor.execute("TRUNCATE TABLE log")
- cursor.execute("SET FOREIGN_KEY_CHECKS = 1;")
+ cursor.execute(f"DROP DATABASE IF EXISTS {db_name}")
conn.commit()
- print("DATABASE CLEARED")
+ print("DATABASE DROPPED")
+ init_database()
+ print("DATABASE INITIALISED")
except Exception as e:
print(f"Error clearing database: {e}")
finally:
diff --git a/schema.py b/schema.py
index 2ebfeeb..826be7b 100644
--- a/schema.py
+++ b/schema.py
@@ -5,302 +5,310 @@ import string
from datetime import datetime
import os
-# Load MySQL credentials
-base_dir = os.path.dirname(os.path.abspath(__file__))
-config_path = os.path.join(base_dir, "mysql_config.json")
-with open(config_path) as f:
- mysql_cred = json.load(f)
-
-servername = mysql_cred["servername"]
-username = mysql_cred["username"]
-password = mysql_cred["password"]
-dbname = mysql_cred["dbname"]
-
-# Create connection
-conn = pymysql.connect(
- host=servername,
- user=username,
- password=password,
- charset="utf8mb4",
- cursorclass=pymysql.cursors.DictCursor,
- autocommit=False,
-)
-
-# Check connection
-if conn is None:
- print("Error connecting to MySQL")
- exit(1)
-
-cursor = conn.cursor()
-
-# Create database
-sql = f"CREATE DATABASE IF NOT EXISTS {dbname}"
-cursor.execute(sql)
-
-# Use database
-cursor.execute(f"USE {dbname}")
-
-# Create tables
-tables = {
- "engine": """
- CREATE TABLE IF NOT EXISTS engine (
- id INT AUTO_INCREMENT PRIMARY KEY,
- name VARCHAR(200),
- engineid VARCHAR(100) NOT NULL
- )
- """,
- "game": """
- CREATE TABLE IF NOT EXISTS game (
- id INT AUTO_INCREMENT PRIMARY KEY,
- name VARCHAR(200),
- engine INT NOT NULL,
- gameid VARCHAR(100) NOT NULL,
- extra VARCHAR(200),
- platform VARCHAR(30),
- language VARCHAR(10),
- FOREIGN KEY (engine) REFERENCES engine(id)
- )
- """,
- "fileset": """
- CREATE TABLE IF NOT EXISTS fileset (
- id INT AUTO_INCREMENT PRIMARY KEY,
- game INT,
- status VARCHAR(20),
- src VARCHAR(20),
- `key` VARCHAR(64),
- `megakey` VARCHAR(64),
- `delete` BOOLEAN DEFAULT FALSE NOT NULL,
- `timestamp` TIMESTAMP NOT NULL,
- detection_size INT,
- FOREIGN KEY (game) REFERENCES game(id)
- )
- """,
- "file": """
- CREATE TABLE IF NOT EXISTS file (
- id INT AUTO_INCREMENT PRIMARY KEY,
- name VARCHAR(200) NOT NULL,
- size BIGINT NOT NULL,
- checksum VARCHAR(64) NOT NULL,
- fileset INT NOT NULL,
- detection BOOLEAN NOT NULL,
- FOREIGN KEY (fileset) REFERENCES fileset(id) ON DELETE CASCADE
- )
- """,
- "filechecksum": """
- CREATE TABLE IF NOT EXISTS filechecksum (
- id INT AUTO_INCREMENT PRIMARY KEY,
- file INT NOT NULL,
- checksize VARCHAR(10) NOT NULL,
- checktype VARCHAR(10) NOT NULL,
- checksum VARCHAR(64) NOT NULL,
- FOREIGN KEY (file) REFERENCES file(id) ON DELETE CASCADE
- )
- """,
- "queue": """
- CREATE TABLE IF NOT EXISTS queue (
- id INT AUTO_INCREMENT PRIMARY KEY,
- time TIMESTAMP NOT NULL,
- notes varchar(300),
- fileset INT,
- userid INT NOT NULL,
- commit VARCHAR(64) NOT NULL,
- FOREIGN KEY (fileset) REFERENCES fileset(id)
- )
- """,
- "log": """
- CREATE TABLE IF NOT EXISTS log (
- id INT AUTO_INCREMENT PRIMARY KEY,
- `timestamp` TIMESTAMP NOT NULL,
- category VARCHAR(100) NOT NULL,
- user VARCHAR(100) NOT NULL,
- `text` varchar(300)
- )
- """,
- "history": """
- CREATE TABLE IF NOT EXISTS history (
- id INT AUTO_INCREMENT PRIMARY KEY,
- `timestamp` TIMESTAMP NOT NULL,
- fileset INT NOT NULL,
- oldfileset INT NOT NULL,
- log INT
- )
- """,
- "transactions": """
- CREATE TABLE IF NOT EXISTS transactions (
- id INT AUTO_INCREMENT PRIMARY KEY,
- `transaction` INT NOT NULL,
- fileset INT NOT NULL
- )
- """,
- "possible_merges": """
- CREATE TABLE IF NOT EXISTS possible_merges (
- id INT AUTO_INCREMENT PRIMARY KEY,
- child_fileset INT,
- parent_fileset INT,
- FOREIGN KEY (child_fileset) REFERENCES fileset(id) ON DELETE CASCADE,
- FOREIGN KEY (parent_fileset) REFERENCES fileset(id) ON DELETE CASCADE
- )
- """,
-}
-for table, definition in tables.items():
- try:
- cursor.execute(definition)
- print(f"Table '{table}' created successfully")
- except pymysql.Error as err:
- print(f"Error creating '{table}' table: {err}")
-
-# Create indices
-indices = {
- "detection": "CREATE INDEX detection ON file (detection)",
- "checksum": "CREATE INDEX checksum ON filechecksum (checksum)",
- "engineid": "CREATE INDEX engineid ON engine (engineid)",
- "key": "CREATE INDEX fileset_key ON fileset (`key`)",
- "status": "CREATE INDEX status ON fileset (status)",
- "fileset": "CREATE INDEX fileset ON history (fileset)",
- "file_name_size": "CREATE INDEX file_name_size ON file (name, size)",
- "file_fileset_detection": "CREATE INDEX file_fileset_detection ON file (fileset, detection)",
-}
-
-try:
- cursor.execute("ALTER TABLE file ADD COLUMN detection_type VARCHAR(20);")
-except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE file MODIFY COLUMN detection_type VARCHAR(20);")
-
-try:
- cursor.execute("ALTER TABLE file ADD COLUMN `timestamp` TIMESTAMP NOT NULL;")
-except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE file MODIFY COLUMN `timestamp` TIMESTAMP NOT NULL;")
-
-try:
- cursor.execute("ALTER TABLE fileset ADD COLUMN `user_count` INT;")
-except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE fileset MODIFY COLUMN `user_count` INT;")
-
-try:
- cursor.execute("ALTER TABLE file ADD COLUMN punycode_name VARCHAR(200);")
-except Exception:
- cursor.execute("ALTER TABLE file MODIFY COLUMN punycode_name VARCHAR(200);")
-
-try:
- cursor.execute(
- "ALTER TABLE file ADD COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
- )
-except Exception:
- cursor.execute(
- "ALTER TABLE file MODIFY COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
+def init_database():
+ # Load MySQL credentials
+ base_dir = os.path.dirname(os.path.abspath(__file__))
+ config_path = os.path.join(base_dir, "mysql_config.json")
+ with open(config_path) as f:
+ mysql_cred = json.load(f)
+
+ servername = mysql_cred["servername"]
+ username = mysql_cred["username"]
+ password = mysql_cred["password"]
+ dbname = mysql_cred["dbname"]
+
+ # Create connection
+ conn = pymysql.connect(
+ host=servername,
+ user=username,
+ password=password,
+ charset="utf8mb4",
+ cursorclass=pymysql.cursors.DictCursor,
+ autocommit=False,
)
-try:
- cursor.execute(
- "ALTER TABLE file ADD COLUMN `size-r` BIGINT DEFAULT 0, ADD COLUMN `size-rd` BIGINT DEFAULT 0;"
- )
-except Exception:
- cursor.execute(
- "ALTER TABLE file MODIFY COLUMN `size-r` BIGINT DEFAULT 0, MODIFY COLUMN `size-rd` BIGINT DEFAULT 0;"
- )
-try:
- cursor.execute("ALTER TABLE log ADD COLUMN `text` varchar(1000);")
-except Exception:
- cursor.execute("ALTER TABLE log MODIFY COLUMN `text` varchar(1000);")
+ # Check connection
+ if conn is None:
+ print("Error connecting to MySQL")
+ exit(1)
+
+ cursor = conn.cursor()
+
+ # Create database
+ sql = f"CREATE DATABASE IF NOT EXISTS {dbname}"
+ cursor.execute(sql)
+
+ # Use database
+ cursor.execute(f"USE {dbname}")
+
+ # Create tables
+ tables = {
+ "engine": """
+ CREATE TABLE IF NOT EXISTS engine (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ name VARCHAR(200),
+ engineid VARCHAR(100) NOT NULL
+ )
+ """,
+ "game": """
+ CREATE TABLE IF NOT EXISTS game (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ name VARCHAR(200),
+ engine INT NOT NULL,
+ gameid VARCHAR(100) NOT NULL,
+ extra VARCHAR(200),
+ platform VARCHAR(30),
+ language VARCHAR(10),
+ FOREIGN KEY (engine) REFERENCES engine(id)
+ )
+ """,
+ "fileset": """
+ CREATE TABLE IF NOT EXISTS fileset (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ game INT,
+ status VARCHAR(20),
+ src VARCHAR(20),
+ `key` VARCHAR(64),
+ `megakey` VARCHAR(64),
+ `delete` BOOLEAN DEFAULT FALSE NOT NULL,
+ `timestamp` TIMESTAMP NOT NULL,
+ detection_size INT,
+ FOREIGN KEY (game) REFERENCES game(id)
+ )
+ """,
+ "file": """
+ CREATE TABLE IF NOT EXISTS file (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ name VARCHAR(200) NOT NULL,
+ size BIGINT NOT NULL,
+ checksum VARCHAR(64) NOT NULL,
+ fileset INT NOT NULL,
+ detection BOOLEAN NOT NULL,
+ FOREIGN KEY (fileset) REFERENCES fileset(id) ON DELETE CASCADE
+ )
+ """,
+ "filechecksum": """
+ CREATE TABLE IF NOT EXISTS filechecksum (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ file INT NOT NULL,
+ checksize VARCHAR(10) NOT NULL,
+ checktype VARCHAR(10) NOT NULL,
+ checksum VARCHAR(64) NOT NULL,
+ FOREIGN KEY (file) REFERENCES file(id) ON DELETE CASCADE
+ )
+ """,
+ "queue": """
+ CREATE TABLE IF NOT EXISTS queue (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ time TIMESTAMP NOT NULL,
+ notes varchar(300),
+ fileset INT,
+ userid INT NOT NULL,
+ commit VARCHAR(64) NOT NULL,
+ FOREIGN KEY (fileset) REFERENCES fileset(id)
+ )
+ """,
+ "log": """
+ CREATE TABLE IF NOT EXISTS log (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ `timestamp` TIMESTAMP NOT NULL,
+ category VARCHAR(100) NOT NULL,
+ user VARCHAR(100) NOT NULL,
+ `text` varchar(300)
+ )
+ """,
+ "history": """
+ CREATE TABLE IF NOT EXISTS history (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ `timestamp` TIMESTAMP NOT NULL,
+ fileset INT NOT NULL,
+ oldfileset INT NOT NULL,
+ log INT
+ )
+ """,
+ "transactions": """
+ CREATE TABLE IF NOT EXISTS transactions (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ `transaction` INT NOT NULL,
+ fileset INT NOT NULL
+ )
+ """,
+ "possible_merges": """
+ CREATE TABLE IF NOT EXISTS possible_merges (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ child_fileset INT,
+ parent_fileset INT,
+ FOREIGN KEY (child_fileset) REFERENCES fileset(id) ON DELETE CASCADE,
+ FOREIGN KEY (parent_fileset) REFERENCES fileset(id) ON DELETE CASCADE
+ )
+ """,
+ }
+
+ for table, definition in tables.items():
+ try:
+ cursor.execute(definition)
+ print(f"Table '{table}' created successfully")
+ except pymysql.Error as err:
+ print(f"Error creating '{table}' table: {err}")
+
+ # Create indices
+ indices = {
+ "detection": "CREATE INDEX detection ON file (detection)",
+ "checksum": "CREATE INDEX checksum ON filechecksum (checksum)",
+ "engineid": "CREATE INDEX engineid ON engine (engineid)",
+ "key": "CREATE INDEX fileset_key ON fileset (`key`)",
+ "status": "CREATE INDEX status ON fileset (status)",
+ "fileset": "CREATE INDEX fileset ON history (fileset)",
+ "file_name_size": "CREATE INDEX file_name_size ON file (name, size)",
+ "file_fileset_detection": "CREATE INDEX file_fileset_detection ON file (fileset, detection)",
+ }
-
-for index, definition in indices.items():
try:
- cursor.execute(definition)
- print(f"Created index for '{index}'")
- except pymysql.Error as err:
- print(f"Error creating index for '{index}': {err}")
-
-
-# Insert random data into tables
-def random_string(length=10):
- return "".join(random.choices(string.ascii_letters + string.digits, k=length))
-
-
-def insert_random_data():
- for _ in range(1000):
- # Insert data into engine
- cursor.execute(
- "INSERT INTO engine (name, engineid) VALUES (%s, %s)",
- (random_string(), random_string()),
- )
+ cursor.execute("ALTER TABLE file ADD COLUMN detection_type VARCHAR(20);")
+ except Exception:
+ # if aleady exists, change the length of the column
+ cursor.execute("ALTER TABLE file MODIFY COLUMN detection_type VARCHAR(20);")
- # Insert data into game
- cursor.execute(
- "INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES (%s, %s, %s, %s, %s, %s)",
- (
- random_string(),
- 1,
- random_string(),
- random_string(),
- random_string(),
- random_string(),
- ),
- )
-
- # Insert data into fileset
- cursor.execute(
- "INSERT INTO fileset (game, status, src, `key`, `megakey`, `timestamp`, detection_size) VALUES (%s, %s, %s, %s, %s, %s, %s)",
- (
- 1,
- "user",
- random_string(),
- random_string(),
- random_string(),
- datetime.now(),
- random.randint(1, 100),
- ),
- )
+ try:
+ cursor.execute("ALTER TABLE file ADD COLUMN `timestamp` TIMESTAMP NOT NULL;")
+ except Exception:
+ # if aleady exists, change the length of the column
+ cursor.execute("ALTER TABLE file MODIFY COLUMN `timestamp` TIMESTAMP NOT NULL;")
- # Insert data into file
- cursor.execute(
- "INSERT INTO file (name, size, checksum, fileset, detection) VALUES (%s, %s, %s, %s, %s)",
- (random_string(), random.randint(1000, 10000), random_string(), 1, True),
- )
+ try:
+ cursor.execute("ALTER TABLE fileset ADD COLUMN `user_count` INT;")
+ except Exception:
+ # if aleady exists, change the length of the column
+ cursor.execute("ALTER TABLE fileset MODIFY COLUMN `user_count` INT;")
- # Insert data into filechecksum
- cursor.execute(
- "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)",
- (1, random_string(), random_string(), random_string()),
- )
+ try:
+ cursor.execute("ALTER TABLE file ADD COLUMN punycode_name VARCHAR(200);")
+ except Exception:
+ cursor.execute("ALTER TABLE file MODIFY COLUMN punycode_name VARCHAR(200);")
- # Insert data into queue
+ try:
cursor.execute(
- "INSERT INTO queue (time, notes, fileset, userid, commit) VALUES (%s, %s, %s, %s, %s)",
- (
- datetime.now(),
- random_string(),
- 1,
- random.randint(1, 100),
- random_string(),
- ),
+ "ALTER TABLE file ADD COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
)
-
- # Insert data into log
+ except Exception:
cursor.execute(
- "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (%s, %s, %s, %s)",
- (datetime.now(), random_string(), random_string(), random_string()),
+ "ALTER TABLE file MODIFY COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
)
- # Insert data into history
+ try:
cursor.execute(
- "INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (%s, %s, %s, %s)",
- (datetime.now(), 1, 2, 1),
+ "ALTER TABLE file ADD COLUMN `size-r` BIGINT DEFAULT 0, ADD COLUMN `size-rd` BIGINT DEFAULT 0;"
)
-
- # Insert data into transactions
+ except Exception:
cursor.execute(
- "INSERT INTO transactions (`transaction`, fileset) VALUES (%s, %s)",
- (random.randint(1, 100), 1),
+ "ALTER TABLE file MODIFY COLUMN `size-r` BIGINT DEFAULT 0, MODIFY COLUMN `size-rd` BIGINT DEFAULT 0;"
)
-
-
-# for testing locally
-# insert_random_data()
-
-conn.commit()
-conn.close()
+ try:
+ cursor.execute("ALTER TABLE log ADD COLUMN `text` varchar(5000);")
+ except Exception:
+ cursor.execute("ALTER TABLE log MODIFY COLUMN `text` varchar(5000);")
+
+ for index, definition in indices.items():
+ try:
+ cursor.execute(definition)
+ print(f"Created index for '{index}'")
+ except pymysql.Error as err:
+ print(f"Error creating index for '{index}': {err}")
+
+ # Insert random data into tables
+ def random_string(length=10):
+ return "".join(random.choices(string.ascii_letters + string.digits, k=length))
+
+ def insert_random_data():
+ for _ in range(1000):
+ # Insert data into engine
+ cursor.execute(
+ "INSERT INTO engine (name, engineid) VALUES (%s, %s)",
+ (random_string(), random_string()),
+ )
+
+ # Insert data into game
+ cursor.execute(
+ "INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES (%s, %s, %s, %s, %s, %s)",
+ (
+ random_string(),
+ 1,
+ random_string(),
+ random_string(),
+ random_string(),
+ random_string(),
+ ),
+ )
+
+ # Insert data into fileset
+ cursor.execute(
+ "INSERT INTO fileset (game, status, src, `key`, `megakey`, `timestamp`, detection_size) VALUES (%s, %s, %s, %s, %s, %s, %s)",
+ (
+ 1,
+ "user",
+ random_string(),
+ random_string(),
+ random_string(),
+ datetime.now(),
+ random.randint(1, 100),
+ ),
+ )
+
+ # Insert data into file
+ cursor.execute(
+ "INSERT INTO file (name, size, checksum, fileset, detection) VALUES (%s, %s, %s, %s, %s)",
+ (
+ random_string(),
+ random.randint(1000, 10000),
+ random_string(),
+ 1,
+ True,
+ ),
+ )
+
+ # Insert data into filechecksum
+ cursor.execute(
+ "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)",
+ (1, random_string(), random_string(), random_string()),
+ )
+
+ # Insert data into queue
+ cursor.execute(
+ "INSERT INTO queue (time, notes, fileset, userid, commit) VALUES (%s, %s, %s, %s, %s)",
+ (
+ datetime.now(),
+ random_string(),
+ 1,
+ random.randint(1, 100),
+ random_string(),
+ ),
+ )
+
+ # Insert data into log
+ cursor.execute(
+ "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (%s, %s, %s, %s)",
+ (datetime.now(), random_string(), random_string(), random_string()),
+ )
+
+ # Insert data into history
+ cursor.execute(
+ "INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (%s, %s, %s, %s)",
+ (datetime.now(), 1, 2, 1),
+ )
+
+ # Insert data into transactions
+ cursor.execute(
+ "INSERT INTO transactions (`transaction`, fileset) VALUES (%s, %s)",
+ (random.randint(1, 100), 1),
+ )
+
+ # for testing locally
+ # insert_random_data()
+
+ conn.commit()
+ conn.close()
+
+
+if __name__ == "__main__":
+ init_database()
Commit: 9c32032912453bbb4092c0e2b5553cadcee20331
https://github.com/scummvm/scummvm-sites/commit/9c32032912453bbb4092c0e2b5553cadcee20331
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Remove global database connection object from fileset.py, which is never closed.
Changed paths:
fileset.py
diff --git a/fileset.py b/fileset.py
index 3f7bc40..dc91d33 100644
--- a/fileset.py
+++ b/fileset.py
@@ -32,21 +32,6 @@ app = Flask(__name__)
secret_key = os.urandom(24)
-base_dir = os.path.dirname(os.path.abspath(__file__))
-config_path = os.path.join(base_dir, "mysql_config.json")
-with open(config_path) as f:
- mysql_cred = json.load(f)
-
-conn = pymysql.connect(
- host=mysql_cred["servername"],
- user=mysql_cred["username"],
- password=mysql_cred["password"],
- db=mysql_cred["dbname"],
- charset="utf8mb4",
- cursorclass=pymysql.cursors.DictCursor,
- autocommit=False,
-)
-
@app.route("/")
def index():
@@ -348,7 +333,7 @@ def fileset():
html += "<th>Description</th>\n"
html += "<th>Log Text</th>\n"
- related_filesets = get_all_related_filesets(id, conn)
+ related_filesets = get_all_related_filesets(id, connection)
cursor.execute(
f"SELECT * FROM history WHERE fileset IN ({','.join(map(str, related_filesets))}) OR oldfileset IN ({','.join(map(str, related_filesets))})"
@@ -971,9 +956,12 @@ def validate():
del json_response["files"]
json_response["status"] = "no_metadata"
- fileset_id = user_insert_fileset(json_object, ip, conn)
+ conn = db_connect()
+ try:
+ fileset_id = user_insert_fileset(json_object, ip, conn)
+ finally:
+ conn.close()
json_response["fileset"] = fileset_id
- print(f"Response: {json_response}")
return jsonify(json_response)
matched_map = {}
Commit: 835ad1fce51c2fecf1535891d2afc79989b53f6c
https://github.com/scummvm/scummvm-sites/commit/835ad1fce51c2fecf1535891d2afc79989b53f6c
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Filter manual merge candidates if size mismatch.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 393906b..77fa453 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1052,6 +1052,8 @@ def set_process(
del set_to_candidate_dict[set_fileset]
del id_to_fileset_dict[set_fileset]
+ manual_merge_map = defaultdict(list)
+
for fileset_id, candidate_filesets in set_to_candidate_dict.items():
fileset = id_to_fileset_dict[fileset_id]
@@ -1060,16 +1062,6 @@ def set_process(
fileset["name"], candidate_filesets, conn
)
- for candidate_fileset in candidate_filesets:
- with conn.cursor() as cursor:
- cursor.execute(
- "SELECT id FROM fileset WHERE status = 'current' AND id = %s",
- (candidate_fileset),
- )
- result = cursor.fetchone()
- if result:
- candidate_filesets.remove(candidate_fileset)
-
(
fully_matched_filesets,
auto_merged_filesets,
@@ -1086,14 +1078,31 @@ def set_process(
auto_merged_filesets,
manual_merged_filesets,
mismatch_filesets,
+ manual_merge_map,
+ set_to_candidate_dict,
conn,
skiplog,
)
+ # print(manual_merge_map)
+
+ for fileset_id, candidates in manual_merge_map.items():
+ category_text = "Manual Merge Required"
+ log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
+ manual_merged_filesets += 1
+ # print(candidates)
+ add_manual_merge(
+ candidates,
+ fileset_id,
+ category_text,
+ log_text,
+ log_text,
+ user,
+ conn,
+ )
+
# Final log
with conn.cursor() as cursor:
- cursor.execute("UPDATE fileset SET status = 'partial' WHERE status = 'current'")
-
cursor.execute(
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
(transaction_id,),
@@ -1156,6 +1165,8 @@ def set_perform_match(
auto_merged_filesets,
manual_merged_filesets,
mismatch_filesets,
+ manual_merge_map,
+ set_to_candidate_dict,
conn,
skiplog,
):
@@ -1170,7 +1181,7 @@ def set_perform_match(
)
status = cursor.fetchone()["status"]
if status == "detection":
- update_fileset_status(cursor, matched_fileset_id, "current")
+ update_fileset_status(cursor, matched_fileset_id, "parital")
set_populate_file(fileset, matched_fileset_id, conn, detection)
auto_merged_filesets += 1
if not skiplog:
@@ -1183,6 +1194,9 @@ def set_perform_match(
conn,
)
delete_original_fileset(fileset_id, conn)
+ remove_manual_merge_if_size_mismatch(
+ matched_fileset_id, manual_merge_map, set_to_candidate_dict, conn
+ )
elif status == "partial" or status == "full":
(is_match, unmatched_files) = is_full_checksum_match(
matched_fileset_id, fileset, conn
@@ -1221,7 +1235,7 @@ def set_perform_match(
for candidate_fileset in candidate_filesets:
(is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
if is_match:
- update_fileset_status(cursor, candidate_fileset, "current")
+ update_fileset_status(cursor, candidate_fileset, "partial")
set_populate_file(fileset, candidate_fileset, conn, detection)
auto_merged_filesets += 1
if not skiplog:
@@ -1234,22 +1248,14 @@ def set_perform_match(
conn,
)
delete_original_fileset(fileset_id, conn)
+ remove_manual_merge_if_size_mismatch(
+ candidate_fileset, manual_merge_map, set_to_candidate_dict, conn
+ )
found_match = True
break
if not found_match:
- category_text = "Manual Merge Required"
- log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}."
- manual_merged_filesets += 1
- add_manual_merge(
- candidate_filesets,
- fileset_id,
- category_text,
- log_text,
- log_text,
- user,
- conn,
- )
+ manual_merge_map[fileset_id] = candidate_filesets
return (
fully_matched_filesets,
@@ -1259,6 +1265,98 @@ def set_perform_match(
)
+def remove_manual_merge_if_size_mismatch(
+ child_fileset, manual_merge_map, set_to_candidate_dict, conn
+):
+ with conn.cursor() as cursor:
+ query = """
+ SELECT f.name, f.size
+ FROM fileset fs
+ JOIN file f ON f.fileset = fs.id
+ WHERE fs.id = %s
+ AND f.detection = 1
+ """
+ cursor.execute(query, (child_fileset,))
+ files = cursor.fetchall()
+
+ for parent_fileset, child_list in manual_merge_map.items():
+ if child_fileset not in child_list:
+ continue
+
+ for file in files:
+ if file["size"] == -1:
+ continue
+
+ query = """
+ SELECT f.id
+ FROM fileset fs
+ JOIN file f ON f.fileset = fs.id
+ WHERE fs.id = %s
+ AND f.name = %s
+ AND f.size = %s
+ """
+ cursor.execute(query, (parent_fileset, file["name"], file["size"]))
+ result = cursor.fetchall()
+
+ if not result:
+ remove_manual_merge(
+ child_fileset,
+ parent_fileset,
+ manual_merge_map,
+ set_to_candidate_dict,
+ conn,
+ )
+ break
+
+ for parent_fileset, child_list in set_to_candidate_dict.items():
+ if child_fileset not in child_list:
+ continue
+
+ for file in files:
+ if file["size"] == -1:
+ continue
+
+ query = """
+ SELECT f.id
+ FROM fileset fs
+ JOIN file f ON f.fileset = fs.id
+ WHERE fs.id = %s
+ AND f.name = %s
+ AND f.size = %s
+ """
+ cursor.execute(query, (parent_fileset, file["name"], file["size"]))
+ result = cursor.fetchall()
+
+ if not result:
+ remove_manual_merge(
+ child_fileset,
+ parent_fileset,
+ manual_merge_map,
+ set_to_candidate_dict,
+ conn,
+ )
+ break
+
+
+def remove_manual_merge(
+ child_fileset, parent_fileset, manual_merge_map, set_to_candidate_dict, conn
+):
+ if parent_fileset in manual_merge_map:
+ if child_fileset in manual_merge_map[parent_fileset]:
+ manual_merge_map[parent_fileset].remove(child_fileset)
+ if parent_fileset in set_to_candidate_dict:
+ if child_fileset in set_to_candidate_dict[parent_fileset]:
+ set_to_candidate_dict[parent_fileset].remove(child_fileset)
+
+ with conn.cursor() as cursor:
+ query = """
+ DELETE FROM possible_merges
+ WHERE child_fileset = %s
+ AND parent_fileset = %s
+ """
+ cursor.execute(query, (child_fileset, parent_fileset))
+
+
def add_manual_merge(
child_filesets, parent_fileset, category_text, log_text, print_text, user, conn
):
@@ -1835,15 +1933,18 @@ def set_populate_file(fileset, fileset_id, conn, detection):
filename = os.path.basename(normalised_path(file["name"]))
- if (engine_name == "glk" and file["size"] not in candidate_file_size) and (
- (filename.lower(), file["size"]) in seen_detection_files
- or (
- filename.lower() not in candidate_files
+ if (engine_name == "glk" and file["size"] not in candidate_file_size) or (
+ engine_name != "glk"
+ and (
+ (filename.lower(), file["size"]) in seen_detection_files
or (
- filename.lower() in candidate_files
- and (
- candidate_files[filename.lower()][1] != -1
- and candidate_files[filename.lower()][1] != file["size"]
+ filename.lower() not in candidate_files
+ or (
+ filename.lower() in candidate_files
+ and (
+ candidate_files[filename.lower()][1] != -1
+ and candidate_files[filename.lower()][1] != file["size"]
+ )
)
)
)
Commit: 37e8a37782d9b904141c25dc03c0bf95edf9f725
https://github.com/scummvm/scummvm-sites/commit/37e8a37782d9b904141c25dc03c0bf95edf9f725
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add metadata for set.dat
Changed paths:
db_functions.py
fileset.py
schema.py
diff --git a/db_functions.py b/db_functions.py
index 77fa453..2e022e3 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -107,6 +107,7 @@ def insert_fileset(
transaction,
log_text,
conn,
+ set_dat_metadata="",
ip="",
username=None,
skiplog=None,
@@ -162,7 +163,7 @@ def insert_fileset(
return (existing_entry, True)
# $game and $key should not be parsed as a mysql string, hence no quotes
- query = f"INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`) VALUES ({game}, '{status}', '{src}', {key}, {megakey}, FROM_UNIXTIME(@fileset_time_last))"
+ query = f"INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`, set_dat_metadata) VALUES ({game}, '{status}', '{src}', {key}, {megakey}, FROM_UNIXTIME(@fileset_time_last), '{escape_string(set_dat_metadata)}')"
fileset_id = -1
with conn.cursor() as cursor:
cursor.execute(query)
@@ -968,6 +969,11 @@ def set_process(
megakey = ""
log_text = f"State {source_status}."
+ set_dat_metadata = ""
+ for meta in fileset:
+ if meta != "rom":
+ set_dat_metadata += meta + " = " + fileset[meta] + " , "
+
(fileset_id, existing) = insert_new_fileset(
fileset,
conn,
@@ -978,8 +984,10 @@ def set_process(
transaction_id,
log_text,
user,
+ set_dat_metadata=set_dat_metadata,
skiplog=skiplog,
)
+
if existing:
continue
@@ -2030,6 +2038,7 @@ def insert_new_fileset(
transaction_id,
log_text,
user,
+ set_dat_metadata="",
ip="",
skiplog=False,
):
@@ -2042,6 +2051,7 @@ def insert_new_fileset(
log_text,
conn,
username=user,
+ set_dat_metadata=set_dat_metadata,
ip=ip,
skiplog=skiplog,
)
diff --git a/fileset.py b/fileset.py
index dc91d33..85bc093 100644
--- a/fileset.py
+++ b/fileset.py
@@ -155,18 +155,32 @@ def fileset():
(id,),
)
row = cursor.fetchone()
- print(row)
if row:
id = row["fileset"]
- cursor.execute(f"SELECT * FROM fileset WHERE id = {id}")
+ cursor.execute("SELECT status FROM fileset WHERE id = %s", (id,))
+ status = cursor.fetchone()["status"]
+
+ if status == "dat":
+ cursor.execute(
+ """SELECT id, game, status, src, `key`, megakey, `delete`, timestamp, set_dat_metadata FROM fileset WHERE id = %s""",
+ (id,),
+ )
+ else:
+ cursor.execute(
+ """SELECT id, game, status, src, `key`, megakey, `delete`, timestamp, detection_size, user_count FROM fileset WHERE id = %s""",
+ (id,),
+ )
+
result = cursor.fetchone()
- print(result)
html += "<h3>Fileset details</h3>"
html += "<table>\n"
if result["game"]:
- cursor.execute(
- f"SELECT game.name as 'game name', engineid, gameid, extra, platform, language FROM fileset JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine WHERE fileset.id = {id}"
- )
+ if status == "dat":
+ query = """SELECT game.name as 'game name', engineid, gameid, extra, platform, language, fileset.set_dat_metadata FROM fileset JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine WHERE fileset.id = %s"""
+ else:
+ query = """SELECT game.name as 'game name', engineid, gameid, extra, platform, language FROM fileset JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine WHERE fileset.id = %s"""
+ print(query)
+ cursor.execute(query, (id,))
result = {**result, **cursor.fetchone()}
else:
# result.pop('key', None)
diff --git a/schema.py b/schema.py
index 826be7b..4b9ba42 100644
--- a/schema.py
+++ b/schema.py
@@ -209,6 +209,15 @@ def init_database():
except Exception:
cursor.execute("ALTER TABLE log MODIFY COLUMN `text` varchar(5000);")
+ try:
+ cursor.execute(
+ "ALTER TABLE fileset ADD COLUMN set_dat_metadata varchar(5000) DEFAULT 'UTF-8';"
+ )
+ except Exception:
+ cursor.execute(
+ "ALTER TABLE fileset MODIFY COLUMN set_dat_metadata varchar(5000) DEFAULT 'UTF-8';"
+ )
+
for index, definition in indices.items():
try:
cursor.execute(definition)
Commit: 5c90d3d63cdbd9ddfe84dadb0b9abffbf52a8364
https://github.com/scummvm/scummvm-sites/commit/5c90d3d63cdbd9ddfe84dadb0b9abffbf52a8364
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add navbar with logo.
Changed paths:
A static/favicon-16x16.png
A static/favicon-32x32.png
A static/integrity_service_logo_256.png
R favicon-16x16.png
R favicon-32x32.png
R integrity_service_logo_256.png
fileset.py
pagination.py
diff --git a/fileset.py b/fileset.py
index 85bc093..436652d 100644
--- a/fileset.py
+++ b/fileset.py
@@ -42,6 +42,11 @@ def index():
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{ url_for('index') }}">
+ <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h1>Fileset Database</h1>
<h2>Fileset Actions</h2>
<ul>
@@ -138,6 +143,11 @@ def fileset():
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{{{ url_for('index') }}}}">
+ <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2><u>Fileset: {id}</u></h2>
<table>
"""
@@ -466,6 +476,11 @@ def match_fileset_route(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{{{ url_for('index') }}}}">
+ <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2>Matched Filesets for Fileset: {id}</h2>
<table>
<tr>
@@ -553,6 +568,11 @@ def merge_fileset(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{{{ url_for('index') }}}}">
+ <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2>Search Results for '{search_query}'</h2>
<form method="POST">
<input type="text" name="search" placeholder="Search fileset">
@@ -587,6 +607,11 @@ def merge_fileset(id):
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{ url_for('index') }}">
+ <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2>Search Fileset to Merge</h2>
<form method="POST">
<input type="text" name="search" placeholder="Search fileset">
@@ -641,6 +666,11 @@ def possible_merge_filesets(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{{{ url_for('index') }}}}">
+ <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2>Possible Merges for fileset-'{id}'</h2>
<table>
<tr><th>ID</th><th>Game Name</th><th>Platform</th><th>Language</th><th>Extra</th><th>Details</th><th>Action</th></tr>
@@ -748,6 +778,11 @@ def confirm_merge(id):
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
+ <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <a href="{{ url_for('index') }}">
+ <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<h2>Confirm Merge</h2>
<table border="1">
<tr><th>Field</th><th>Source Fileset</th><th>Target Fileset</th></tr>
diff --git a/pagination.py b/pagination.py
index cb8ba3d..28b82f5 100644
--- a/pagination.py
+++ b/pagination.py
@@ -141,6 +141,11 @@ def create_page(
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
+ <nav style="padding: 3px; margin-bottom: 20px; border-bottom: 1px solid #ccc;">
+ <a href="{{ url_for('index') }}">
+ <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
+ </a>
+ </nav>
<form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
<table>
"""
diff --git a/favicon-16x16.png b/static/favicon-16x16.png
similarity index 100%
rename from favicon-16x16.png
rename to static/favicon-16x16.png
diff --git a/favicon-32x32.png b/static/favicon-32x32.png
similarity index 100%
rename from favicon-32x32.png
rename to static/favicon-32x32.png
diff --git a/integrity_service_logo_256.png b/static/integrity_service_logo_256.png
similarity index 100%
rename from integrity_service_logo_256.png
rename to static/integrity_service_logo_256.png
Commit: fe850f82ee8a7d8c8a4d87f2b96a0b5ec580b2eb
https://github.com/scummvm/scummvm-sites/commit/fe850f82ee8a7d8c8a4d87f2b96a0b5ec580b2eb
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add modification timestamps for macfiles
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index 3b0e155..94bf18b 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -4,7 +4,7 @@ import argparse
import struct
import sys
from enum import Enum
-from datetime import datetime, date
+from datetime import datetime, date, timedelta
from collections import defaultdict
class FileType(Enum):
@@ -75,9 +75,10 @@ def get_dirs_at_depth(directory, depth):
if depth == num_sep_this - num_sep:
yield root
-def read_be_32(byte_stream):
+def read_be_32(byte_stream, signed=False):
""" Return unsigned integer of size_in_bits, assuming the data is big-endian """
- (uint,) = struct.unpack(">I", byte_stream[:32//8])
+ format = ">i" if signed else ">I"
+ (uint,) = struct.unpack(format, byte_stream[:32//8])
return uint
def read_be_16(byte_stream):
@@ -534,7 +535,6 @@ def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=No
for filepath in filtered_file_map:
file_collection[filepath] = file_classification(filepath)
-
# Remove extra entries of macfiles to avoid extra checksum calculation in form of non mac files
# Checksum for both the forks are calculated using a single file, so other files should be removed from the collection
file_filter(file_collection)
@@ -557,6 +557,70 @@ def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=No
return res
+def extract_macbin_mtime(file_byte_stream):
+ """
+ Returns modification time of macbinary file from the header.
+ Doc - +$5f / 4: modification date/time.
+ Doc - Timestamps are unsigned 32-bit values indicating the time in seconds since midnight on Jan 1, 1904, in local time.
+ """
+ macbin_epoch = datetime(1904, 1, 1)
+ header = file_byte_stream[:128]
+ macbin_seconds = read_be_32(header[0x5f:])
+ return (macbin_epoch + timedelta(seconds=macbin_seconds)).date()
+
+
+def extract_mtime_appledouble(file_byte_stream):
+ """
+ Returns modification time of appledouble file.
+ Doc 1 - The File Dates Info entry (ID=8) consists of the file creation, modification, backup
+ and access times (see Figure 2-1), stored as a signed number of seconds before
+ or after 12:00 a.m. (midnight), January 1, 2000 Greenwich Mean Time (GMT)
+
+ Doc 2 -
+ struct ASFileDates /* entry ID 8, file dates info */
+ {
+ sint32 create; /* file creation date/time */
+ sint32 modify; /* last modification date/time */
+ sint32 backup; /* last backup date/time */
+ sint32 access; /* last access date/time */
+ }; /* ASFileDates */
+ """
+ entry_count = read_be_16(file_byte_stream[24:])
+ for entry in range(entry_count):
+ start_index = 26 + entry*12
+ id = read_be_32(file_byte_stream[start_index:])
+ offset = read_be_32(file_byte_stream[start_index+4:])
+ length = read_be_32(file_byte_stream[start_index+8:])
+
+ if id == 8:
+ date_info_data = file_byte_stream[offset:offset + length]
+ if len(date_info_data) < 16:
+ raise ValueError("FileDatesInfo block is too short.")
+ appledouble_epoch = datetime(2000, 1, 1)
+ modify_seconds = read_be_32(date_info_data[4:8], signed=True)
+ return (appledouble_epoch + timedelta(seconds=modify_seconds)).date()
+
+ return None
+
+
+def macfile_timestamp(filepath):
+ """
+ Returns the modification times for the mac file from their finderinfo.
+ If the file is not a macfile, it returns None
+ """
+ with open(filepath, "rb") as f:
+ data = f.read()
+ # Macbinary
+ if is_macbin(filepath):
+ return extract_macbin_mtime(data)
+
+ # Appledouble
+ if is_appledouble_rsrc(filepath) or is_appledouble_in_dot_(filepath) or is_appledouble_in_macosx(filepath):
+ return extract_mtime_appledouble(data)
+
+ return None
+
+
def validate_date(date_str):
"""
Confirms if the user provided timestamp is in a valid format.
@@ -579,12 +643,15 @@ def filter_files_by_timestamp(files, limit_timestamps_date):
"""
filtered_file_map = defaultdict(str)
+
if limit_timestamp_date is not None:
user_date = validate_date(limit_timestamps_date)
today = date.today()
for filepath in files:
- mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
+ mtime = macfile_timestamp(filepath)
+ if mtime is None:
+ mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)):
filtered_file_map[filepath] = str(mtime)
Commit: 9c039e379ae8ac187a49afd7020d40a97441758e
https://github.com/scummvm/scummvm-sites/commit/9c039e379ae8ac187a49afd7020d40a97441758e
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGIRTY: Add punycode encoding for scan utlity.
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index 94bf18b..626e622 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -18,6 +18,8 @@ class FileType(Enum):
script_version = "0.1"
+SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
+
# CRC table
CRC16_XMODEM_TABLE = [
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
@@ -75,6 +77,83 @@ def get_dirs_at_depth(directory, depth):
if depth == num_sep_this - num_sep:
yield root
+
+def my_escape_string(s: str) -> str:
+ """
+ Escape strings
+
+ Escape the following:
+ - escape char: \x81
+ - unallowed filename chars: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
+ - control chars < 0x20
+ """
+ new_name = ""
+ for char in s:
+ if char == "\x81":
+ new_name += "\x81\x79"
+ elif char in SPECIAL_SYMBOLS or ord(char) < 0x20:
+ new_name += "\x81" + chr(0x80 + ord(char))
+ else:
+ new_name += char
+ return new_name
+
+
+def encode_punycode(orig):
+ """
+ Punyencode strings
+
+ - escape special characters and
+ - ensure filenames can't end in a space or dotif temp == None:
+ """
+ s = my_escape_string(orig)
+ encoded = s.encode("punycode").decode("ascii")
+ # punyencoding adds an '-' at the end when there are no special chars
+ # don't use it for comparing
+ compare = encoded
+ if encoded.endswith("-"):
+ compare = encoded[:-1]
+ if orig != compare or compare[-1] in " .":
+ return "xn--" + encoded
+ return orig
+
+
+def punycode_need_encode(orig):
+ """
+ A filename needs to be punyencoded when it:
+
+ - contains a char that should be escaped or
+ - ends with a dot or a space.
+ """
+ if len(orig) > 4 and orig[:4] == "xn--":
+ return False
+ if not all((0x20 <= ord(c) < 0x80) and c not in SPECIAL_SYMBOLS for c in orig):
+ return True
+ if orig[-1] in " .":
+ return True
+ return False
+
+
+def split_path_recursive(path):
+ parts = []
+ while True:
+ head, tail = os.path.split(path)
+ if tail:
+ parts.insert(0, tail)
+ path = head
+ else:
+ if head:
+ parts.insert(0, head)
+ break
+ return parts
+
+def encode_path_components(filepath):
+ """
+ Puny encodes all separate components of filepath
+ """
+ parts = split_path_recursive(filepath)
+ encoded_parts = [encode_punycode(p) if punycode_need_encode(p) else p for p in parts]
+ return os.path.join(*encoded_parts)
+
def read_be_32(byte_stream, signed=False):
""" Return unsigned integer of size_in_bits, assuming the data is big-endian """
format = ">i" if signed else ">I"
@@ -202,25 +281,6 @@ def macbin_get_datafork(file_byte_stream):
(datalen,) = struct.unpack(">I", file_byte_stream[0x53:0x57])
return file_byte_stream[0x80: 0x80 + datalen]
-def is_appledouble(file_byte_stream):
- """
- Appledouble Structure -
-
- Header:
- +$00 / 4: signature (0x00 0x05 0x16 0x00)
- +$04 / 4: version (0x00 0x01 0x00 0x00 (v1) -or- 0x00 0x02 0x00 0x00 (v2))
- +$08 /16: home file system string (v1) -or- zeroes (v2)
- +$18 / 2: number of entries
-
- Entries:
- +$00 / 4: entry ID (1-15)
- +$04 / 4: offset to data from start of file
- +$08 / 4: length of entry in bytes; may be zero
- """
- if (not file_byte_stream or read_be_32(file_byte_stream) != 0x00051607):
- return False
-
- return True
def appledouble_get_resfork_data(file_byte_stream):
""" Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file"""
@@ -672,6 +732,7 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
for hash_of_dir in hash_of_dirs:
file.write("game (\n")
for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items():
+ filename = encode_path_components(filename)
data = f"name \"{filename}\" size {size} size-r {size_r} size-rd {size_rd} timestamp {timestamp}"
for key, value in hashes:
data += f" {key} {value}"
Commit: 9a3f55602ccadbd5872675d89202831fa347955b
https://github.com/scummvm/scummvm-sites/commit/9a3f55602ccadbd5872675d89202831fa347955b
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Fix the navbar on top.
Changed paths:
fileset.py
pagination.py
diff --git a/fileset.py b/fileset.py
index 436652d..c8e9cf7 100644
--- a/fileset.py
+++ b/fileset.py
@@ -42,12 +42,12 @@ def index():
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{ url_for('index') }}">
<img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h1>Fileset Database</h1>
+ <h1 style="margin-top: 80px;">Fileset Database</h1>
<h2>Fileset Actions</h2>
<ul>
<li><a href="{{ url_for('fileset') }}">Fileset</a></li>
@@ -143,12 +143,12 @@ def fileset():
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{{{ url_for('index') }}}}">
<img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2><u>Fileset: {id}</u></h2>
+ <h2 style="margin-top: 80px;"><u>Fileset: {id}</u></h2>
<table>
"""
html += f"<button type='button' onclick=\"location.href='/fileset/{id}/merge'\">Manual Merge</button>"
@@ -476,12 +476,12 @@ def match_fileset_route(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{{{ url_for('index') }}}}">
<img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2>Matched Filesets for Fileset: {id}</h2>
+ <h2 style="margin-top: 80px;">Matched Filesets for Fileset: {id}</h2>
<table>
<tr>
<th>Fileset ID</th>
@@ -568,12 +568,12 @@ def merge_fileset(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{{{ url_for('index') }}}}">
<img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2>Search Results for '{search_query}'</h2>
+ <h2 style="margin-top: 80px;">Search Results for '{search_query}'</h2>
<form method="POST">
<input type="text" name="search" placeholder="Search fileset">
<input type="submit" value="Search">
@@ -607,12 +607,12 @@ def merge_fileset(id):
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{ url_for('index') }}">
<img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2>Search Fileset to Merge</h2>
+ <h2 style="margin-top: 80px;">Search Fileset to Merge</h2>
<form method="POST">
<input type="text" name="search" placeholder="Search fileset">
<input type="submit" value="Search">
@@ -666,12 +666,12 @@ def possible_merge_filesets(id):
<link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{{{ url_for('index') }}}}">
<img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2>Possible Merges for fileset-'{id}'</h2>
+ <h2 style="margin-top: 80px;">Possible Merges for fileset-'{id}'</h2>
<table>
<tr><th>ID</th><th>Game Name</th><th>Platform</th><th>Language</th><th>Extra</th><th>Details</th><th>Action</th></tr>
"""
@@ -778,12 +778,12 @@ def confirm_merge(id):
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
- <nav style="padding: 3px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{ url_for('index') }}">
<img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
- <h2>Confirm Merge</h2>
+ <h2 style="margin-top: 80px;">Confirm Merge</h2>
<table border="1">
<tr><th>Field</th><th>Source Fileset</th><th>Target Fileset</th></tr>
"""
diff --git a/pagination.py b/pagination.py
index 28b82f5..091384c 100644
--- a/pagination.py
+++ b/pagination.py
@@ -141,13 +141,13 @@ def create_page(
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
- <nav style="padding: 3px; margin-bottom: 20px; border-bottom: 1px solid #ccc;">
+ <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
<a href="{{ url_for('index') }}">
<img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
</a>
</nav>
<form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
-<table>
+<table style="margin-top: 80px;">
"""
if not results:
return "No results for given filters"
Commit: b610e5f45f90e418e2e67234751f14435ed70f97
https://github.com/scummvm/scummvm-sites/commit/b610e5f45f90e418e2e67234751f14435ed70f97
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Limit match fileset to 1 in remove_manual_merge_if_size_mismatch
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 2e022e3..4c2d927 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1179,7 +1179,7 @@ def set_perform_match(
skiplog,
):
"""
- TODO
+ "Performs matching for set.dat"
"""
with conn.cursor() as cursor:
if len(candidate_filesets) == 1:
@@ -1189,11 +1189,11 @@ def set_perform_match(
)
status = cursor.fetchone()["status"]
if status == "detection":
- update_fileset_status(cursor, matched_fileset_id, "parital")
+ update_fileset_status(cursor, matched_fileset_id, "partial")
set_populate_file(fileset, matched_fileset_id, conn, detection)
auto_merged_filesets += 1
if not skiplog:
- set_log_matched_fileset(
+ log_matched_fileset(
src,
fileset_id,
matched_fileset_id,
@@ -1247,7 +1247,7 @@ def set_perform_match(
set_populate_file(fileset, candidate_fileset, conn, detection)
auto_merged_filesets += 1
if not skiplog:
- set_log_matched_fileset(
+ log_matched_fileset(
src,
fileset_id,
candidate_fileset,
@@ -1287,63 +1287,37 @@ def remove_manual_merge_if_size_mismatch(
cursor.execute(query, (child_fileset,))
files = cursor.fetchall()
- for parent_fileset, child_list in manual_merge_map.items():
- if child_fileset not in child_list:
- continue
-
- for file in files:
- if file["size"] == -1:
- continue
-
- query = """
- SELECT f.id
- FROM fileset fs
- JOIN file f ON f.fileset = fs.id
- WHERE fs.id = %s
- AND f.name = %s
- AND f.size = %s
- """
- cursor.execute(query, (parent_fileset, file["name"], file["size"]))
- result = cursor.fetchall()
-
- if not result:
- remove_manual_merge(
- child_fileset,
- parent_fileset,
- manual_merge_map,
- set_to_candidate_dict,
- conn,
- )
- break
-
- for parent_fileset, child_list in set_to_candidate_dict.items():
- if child_fileset not in child_list:
- continue
-
- for file in files:
- if file["size"] == -1:
+ for possible_removals in [manual_merge_map, set_to_candidate_dict]:
+ for parent_fileset, child_list in possible_removals.items():
+ if child_fileset not in child_list:
continue
- query = """
- SELECT f.id
- FROM fileset fs
- JOIN file f ON f.fileset = fs.id
- WHERE fs.id = %s
- AND f.name = %s
- AND f.size = %s
- """
- cursor.execute(query, (parent_fileset, file["name"], file["size"]))
- result = cursor.fetchall()
-
- if not result:
- remove_manual_merge(
- child_fileset,
- parent_fileset,
- manual_merge_map,
- set_to_candidate_dict,
- conn,
- )
- break
+ for file in files:
+ if file["size"] == -1:
+ continue
+
+ query = """
+ SELECT fs.id
+ FROM fileset fs
+ JOIN file f ON f.fileset = fs.id
+ WHERE fs.id = %s
+ AND REGEXP_REPLACE(f.name, '^.*[\\\\/]', '') = %s
+ AND f.size = %s
+ LIMIT 1
+ """
+ filename = os.path.basename(normalised_path(file["name"]))
+ cursor.execute(query, (parent_fileset, filename, file["size"]))
+ result = cursor.fetchall()
+
+ if not result:
+ remove_manual_merge(
+ child_fileset,
+ parent_fileset,
+ manual_merge_map,
+ set_to_candidate_dict,
+ conn,
+ )
+ break
def remove_manual_merge(
@@ -2063,21 +2037,20 @@ def insert_new_fileset(
cursor.execute("SELECT @file_last AS file_id")
file_id = cursor.fetchone()["file_id"]
for key, value in file.items():
- if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
+ if key not in [
+ "name",
+ "size",
+ "size-r",
+ "size-rd",
+ "sha1",
+ "crc",
+ "modification-time",
+ ]:
insert_filechecksum(file, key, file_id, conn)
return (fileset_id, existing)
def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
- category_text = f"Matched from {src}"
- log_text = f"Matched Fileset:{fileset_id}. State {state}."
- log_last = create_log(
- escape_string(category_text), user, escape_string(log_text), conn
- )
- update_history(fileset_last, fileset_id, conn, log_last)
-
-
-def set_log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
category_text = f"Matched from {src}"
log_text = (
f"Matched Fileset:{fileset_last} with Fileset:{fileset_id}. State {state}."
Commit: 6a6847c2bf3aeeb8942beb5f64cea4b0d83f4e7b
https://github.com/scummvm/scummvm-sites/commit/6a6847c2bf3aeeb8942beb5f64cea4b0d83f4e7b
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Improve console logging with progress update.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 4c2d927..bb6a2a8 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -9,11 +9,13 @@ from pymysql.converters import escape_string
from collections import defaultdict
import re
import copy
+import sys
SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
def db_connect():
+ console_log("Connecting to the Database.")
base_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(base_dir, "mysql_config.json")
with open(config_path) as f:
@@ -28,7 +30,7 @@ def db_connect():
cursorclass=pymysql.cursors.DictCursor,
autocommit=False,
)
-
+ console_log(f"Connected to Database - {mysql_cred['dbname']}")
return conn
@@ -526,12 +528,17 @@ def db_insert(data_arr, username=None, skiplog=False):
transaction_id = temp + 1
category_text = f"Uploaded from {src}"
- log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
+ log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
user = f"cli:{getpass.getuser()}" if username is None else username
create_log(escape_string(category_text), user, escape_string(log_text), conn)
+ console_log(log_text)
+ console_log_total_filesets(filepath)
+
+ fileset_count = 1
for fileset in game_data:
+ console_log_detection(fileset_count)
key = calc_key(fileset)
megakey = calc_megakey(fileset)
@@ -555,7 +562,7 @@ def db_insert(data_arr, username=None, skiplog=False):
if existing_entry is not None:
log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
create_log("Warning", user, escape_string(log_text), conn)
- print(log_text)
+ console_log(log_text)
continue
insert_game(
@@ -594,6 +601,8 @@ def db_insert(data_arr, username=None, skiplog=False):
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
insert_filechecksum(file, key, file_id, conn)
+ fileset_count += 1
+
if detection:
conn.cursor().execute(
"UPDATE fileset SET status = 'obsolete' WHERE `timestamp` != FROM_UNIXTIME(@fileset_time_last) AND status = 'detection'"
@@ -607,6 +616,7 @@ def db_insert(data_arr, username=None, skiplog=False):
fileset_insertion_count = cur.fetchone()["COUNT(fileset)"]
category_text = f"Uploaded from {src}"
log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
+ console_log(log_text)
except Exception as e:
print("Inserting failed:", e)
else:
@@ -871,8 +881,9 @@ def match_fileset(data_arr, username=None, skiplog=False):
transaction_id = transaction_id + 1 if transaction_id else 1
category_text = f"Uploaded from {src}"
- log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
-
+ log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
+ console_log(log_text)
+ console_log_total_filesets(filepath)
user = f"cli:{getpass.getuser()}" if username is None else username
create_log(escape_string(category_text), user, escape_string(log_text), conn)
@@ -941,6 +952,9 @@ def set_process(
mismatch_filesets = 0
dropped_early_no_candidate = 0
dropped_early_single_candidate_multiple_sets = 0
+
+ fileset_count = 0
+
# A mapping from set filesets to candidate filesets list
set_to_candidate_dict = defaultdict(list)
id_to_fileset_dict = defaultdict(dict)
@@ -995,12 +1009,12 @@ def set_process(
engine_name = fileset["sourcefile"].split("-")[0]
if engine_name == "glk":
- candidate_filesets = set_glk_filter_candidate_filesets(
- fileset_id, fileset, transaction_id, engine_name, conn
+ (candidate_filesets, fileset_count) = set_glk_filter_candidate_filesets(
+ fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
)
else:
- candidate_filesets = set_filter_candidate_filesets(
- fileset_id, fileset, transaction_id, conn
+ (candidate_filesets, fileset_count) = set_filter_candidate_filesets(
+ fileset_id, fileset, fileset_count, transaction_id, conn
)
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
@@ -1016,10 +1030,18 @@ def set_process(
)
dropped_early_no_candidate += 1
delete_original_fileset(fileset_id, conn)
-
id_to_fileset_dict[fileset_id] = fileset
set_to_candidate_dict[fileset_id].extend(candidate_filesets)
+ console_message = "Candidate filtering finished."
+ console_log(console_message)
+ console_message = (
+ f"{dropped_early_no_candidate} Filesets Dropped - No candidates found."
+ )
+ console_log(console_message)
+ console_message = "Looking for duplicates..."
+ console_log(console_message)
+
# Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants.
value_to_keys = defaultdict(list)
for set_fileset, candidates in set_to_candidate_dict.items():
@@ -1052,6 +1074,7 @@ def set_process(
fileset["description"] if "description" in fileset else ""
)
log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
+ console_log(log_text)
create_log(
escape_string(category_text), user, escape_string(log_text), conn
)
@@ -1062,7 +1085,9 @@ def set_process(
manual_merge_map = defaultdict(list)
+ match_count = 1
for fileset_id, candidate_filesets in set_to_candidate_dict.items():
+ console_log_matching(match_count)
fileset = id_to_fileset_dict[fileset_id]
# Filter by platform to reduce manual merge
@@ -1092,21 +1117,15 @@ def set_process(
skiplog,
)
- # print(manual_merge_map)
+ match_count += 1
+ console_log("Matching performed.")
for fileset_id, candidates in manual_merge_map.items():
category_text = "Manual Merge Required"
log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
manual_merged_filesets += 1
- # print(candidates)
add_manual_merge(
- candidates,
- fileset_id,
- category_text,
- log_text,
- log_text,
- user,
- conn,
+ candidates, fileset_id, category_text, log_text, user, conn, log_text
)
# Final log
@@ -1121,6 +1140,7 @@ def set_process(
create_log(escape_string(category_text), user, escape_string(log_text), conn)
category_text = "Upload information"
log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {auto_merged_filesets}. Filesets dropped early (no candidate) - {dropped_early_no_candidate}. Filesets dropped early (mapping to single detection) - {dropped_early_single_candidate_multiple_sets}. Filesets requiring manual merge: {manual_merged_filesets}. Partial/Full filesets already present: {fully_matched_filesets}. Partial/Full filesets with mismatch {mismatch_filesets}."
+ console_log(log_text)
create_log(escape_string(category_text), user, escape_string(log_text), conn)
@@ -1225,14 +1245,13 @@ def set_perform_match(
else:
category_text = "Mismatch"
log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge."
- print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
+ # print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
mismatch_filesets += 1
add_manual_merge(
[matched_fileset_id],
fileset_id,
category_text,
log_text,
- print_text,
user,
conn,
)
@@ -1340,7 +1359,7 @@ def remove_manual_merge(
def add_manual_merge(
- child_filesets, parent_fileset, category_text, log_text, print_text, user, conn
+ child_filesets, parent_fileset, category_text, log_text, user, conn, print_text=None
):
"""
Adds the manual merge entries to a table called possible_merges.
@@ -1356,7 +1375,8 @@ def add_manual_merge(
cursor.execute(query, (child_fileset, parent_fileset))
create_log(escape_string(category_text), user, escape_string(log_text), conn)
- print(print_text)
+ if print_text:
+ print(print_text)
def is_full_checksum_match(candidate_fileset, fileset, conn):
@@ -1395,14 +1415,15 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
def set_glk_filter_candidate_filesets(
- fileset_id, fileset, transaction_id, engine_name, conn
+ fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
):
"""
Returns a list of candidate filesets for glk engines that can be merged
"""
with conn.cursor() as cursor:
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
-
+ fileset_count += 1
+ console_log_candidate_filtering(fileset_count)
query = """
WITH candidate_fileset AS (
SELECT fs.id AS fileset_id, f.size
@@ -1469,16 +1490,19 @@ def set_glk_filter_candidate_filesets(
for row in rows:
candidates.append(row["fileset_id"])
- return candidates
+ return (candidates, fileset_count)
-def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
+def set_filter_candidate_filesets(
+ fileset_id, fileset, fileset_count, transaction_id, conn
+):
"""
Returns a list of candidate filesets that can be merged
"""
with conn.cursor() as cursor:
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
-
+ fileset_count += 1
+ console_log_candidate_filtering(fileset_count)
query = """
WITH candidate_fileset AS (
SELECT fs.id AS fileset_id, f.name, f.size
@@ -1536,7 +1560,7 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
for row in rows:
candidates.append(row["fileset_id"])
- return candidates
+ return (candidates, fileset_count)
def process_fileset(
@@ -2265,3 +2289,33 @@ def add_usercount(fileset, conn):
cursor.execute(
f"UPDATE fileset SET status = 'ReadyForReview' WHERE id = {fileset}"
)
+
+
+def console_log(message):
+ sys.stdout.write(" " * 50 + "\r")
+ sys.stdout.flush()
+ print(message)
+
+
+def console_log_candidate_filtering(fileset_count):
+ sys.stdout.write(f"Filtering Candidates - Fileset {fileset_count}\r")
+ sys.stdout.flush()
+
+
+def console_log_matching(fileset_count):
+ sys.stdout.write(f"Performing Match - Fileset {fileset_count}\r")
+ sys.stdout.flush()
+
+
+def console_log_detection(fileset_count):
+ sys.stdout.write(f"Processing - Fileset {fileset_count}\r")
+ sys.stdout.flush()
+
+
+def console_log_total_filesets(file_path):
+ count = 0
+ with open(file_path, "r") as f:
+ for line in f:
+ if line.strip().startswith("game ("):
+ count += 1
+ print(f"Total filesets present - {count}.")
Commit: fa13ffdef0af213a0342fc7cb8de1c290fd18c57
https://github.com/scummvm/scummvm-sites/commit/fa13ffdef0af213a0342fc7cb8de1c290fd18c57
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Remove custom recursive path split function.
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index 626e622..d63b22f 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -132,25 +132,11 @@ def punycode_need_encode(orig):
return True
return False
-
-def split_path_recursive(path):
- parts = []
- while True:
- head, tail = os.path.split(path)
- if tail:
- parts.insert(0, tail)
- path = head
- else:
- if head:
- parts.insert(0, head)
- break
- return parts
-
def encode_path_components(filepath):
"""
Puny encodes all separate components of filepath
"""
- parts = split_path_recursive(filepath)
+ parts = [i for i in filepath.split(os.sep) if i ]
encoded_parts = [encode_punycode(p) if punycode_need_encode(p) else p for p in parts]
return os.path.join(*encoded_parts)
@@ -733,7 +719,7 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
file.write("game (\n")
for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items():
filename = encode_path_components(filename)
- data = f"name \"{filename}\" size {size} size-r {size_r} size-rd {size_rd} timestamp {timestamp}"
+ data = f"name '{filename}' size {size} size-r {size_r} size-rd {size_rd} modification-time {timestamp}"
for key, value in hashes:
data += f" {key} {value}"
Commit: f5a99e487476bc2e8575230b43c8374d03160d82
https://github.com/scummvm/scummvm-sites/commit/f5a99e487476bc2e8575230b43c8374d03160d82
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Use INFORMATION_SCHEMA.COLUMNS instead of relying on error handling for column migration.
Changed paths:
schema.py
diff --git a/schema.py b/schema.py
index 4b9ba42..9bf42ee 100644
--- a/schema.py
+++ b/schema.py
@@ -164,59 +164,85 @@ def init_database():
"file_fileset_detection": "CREATE INDEX file_fileset_detection ON file (fileset, detection)",
}
- try:
- cursor.execute("ALTER TABLE file ADD COLUMN detection_type VARCHAR(20);")
- except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE file MODIFY COLUMN detection_type VARCHAR(20);")
-
- try:
- cursor.execute("ALTER TABLE file ADD COLUMN `timestamp` TIMESTAMP NOT NULL;")
- except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE file MODIFY COLUMN `timestamp` TIMESTAMP NOT NULL;")
-
- try:
- cursor.execute("ALTER TABLE fileset ADD COLUMN `user_count` INT;")
- except Exception:
- # if aleady exists, change the length of the column
- cursor.execute("ALTER TABLE fileset MODIFY COLUMN `user_count` INT;")
-
- try:
- cursor.execute("ALTER TABLE file ADD COLUMN punycode_name VARCHAR(200);")
- except Exception:
- cursor.execute("ALTER TABLE file MODIFY COLUMN punycode_name VARCHAR(200);")
-
- try:
- cursor.execute(
- "ALTER TABLE file ADD COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
- )
- except Exception:
- cursor.execute(
- "ALTER TABLE file MODIFY COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';"
- )
-
- try:
- cursor.execute(
- "ALTER TABLE file ADD COLUMN `size-r` BIGINT DEFAULT 0, ADD COLUMN `size-rd` BIGINT DEFAULT 0;"
- )
- except Exception:
- cursor.execute(
- "ALTER TABLE file MODIFY COLUMN `size-r` BIGINT DEFAULT 0, MODIFY COLUMN `size-rd` BIGINT DEFAULT 0;"
- )
- try:
- cursor.execute("ALTER TABLE log ADD COLUMN `text` varchar(5000);")
- except Exception:
- cursor.execute("ALTER TABLE log MODIFY COLUMN `text` varchar(5000);")
-
- try:
- cursor.execute(
- "ALTER TABLE fileset ADD COLUMN set_dat_metadata varchar(5000) DEFAULT 'UTF-8';"
- )
- except Exception:
- cursor.execute(
- "ALTER TABLE fileset MODIFY COLUMN set_dat_metadata varchar(5000) DEFAULT 'UTF-8';"
- )
+ def migrate_column(cursor, table_name, column_name, add_sql, modify_sql):
+ query = """
+ SELECT COUNT(*) AS count
+ FROM INFORMATION_SCHEMA.COLUMNS
+ WHERE table_name = %s AND column_name = %s
+ """
+ cursor.execute(query, (table_name, column_name))
+ exists = cursor.fetchone()["count"] > 0
+
+ if exists:
+ print(f"Modifying column '{column_name}' in table '{table_name}'")
+ cursor.execute(modify_sql)
+ else:
+ print(f"Adding column '{column_name}' to table '{table_name}'")
+ cursor.execute(add_sql)
+
+ migrate_column(
+ cursor,
+ "file",
+ "detection_type",
+ "ALTER TABLE file ADD COLUMN detection_type VARCHAR(20);",
+ "ALTER TABLE file MODIFY COLUMN detection_type VARCHAR(20);",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "timestamp",
+ "ALTER TABLE file ADD COLUMN `timestamp` TIMESTAMP NOT NULL;",
+ "ALTER TABLE file MODIFY COLUMN `timestamp` TIMESTAMP NOT NULL;",
+ )
+
+ migrate_column(
+ cursor,
+ "fileset",
+ "user_count",
+ "ALTER TABLE fileset ADD COLUMN `user_count` INT;",
+ "ALTER TABLE fileset MODIFY COLUMN `user_count` INT;",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "punycode_name",
+ "ALTER TABLE file ADD COLUMN punycode_name VARCHAR(200);",
+ "ALTER TABLE file MODIFY COLUMN punycode_name VARCHAR(200);",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "encoding_type",
+ "ALTER TABLE file ADD COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';",
+ "ALTER TABLE file MODIFY COLUMN encoding_type VARCHAR(20) DEFAULT 'UTF-8';",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "size-r",
+ "ALTER TABLE file ADD COLUMN `size-r` BIGINT DEFAULT 0;",
+ "ALTER TABLE file MODIFY COLUMN `size-r` BIGINT DEFAULT 0;",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "size-rd",
+ "ALTER TABLE file ADD COLUMN `size-rd` BIGINT DEFAULT 0;",
+ "ALTER TABLE file MODIFY COLUMN `size-rd` BIGINT DEFAULT 0;",
+ )
+
+ migrate_column(
+ cursor,
+ "log",
+ "text",
+ "ALTER TABLE log ADD COLUMN `text` VARCHAR(5000);",
+ "ALTER TABLE log MODIFY COLUMN `text` VARCHAR(5000);",
+ )
for index, definition in indices.items():
try:
Commit: 7a32cd385d10c74f779313437755d34269df1935
https://github.com/scummvm/scummvm-sites/commit/7a32cd385d10c74f779313437755d34269df1935
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add scan processing logic.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index bb6a2a8..683fb63 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -902,6 +902,21 @@ def match_fileset(data_arr, username=None, skiplog=False):
user,
skiplog,
)
+ elif src == "scan":
+ scan_process(
+ game_data,
+ resources,
+ detection,
+ src,
+ conn,
+ transaction_id,
+ filepath,
+ author,
+ version,
+ source_status,
+ user,
+ skiplog,
+ )
else:
game_data_lookup = {fs["name"]: fs for fs in game_data}
for fileset in game_data:
@@ -924,6 +939,628 @@ def match_fileset(data_arr, username=None, skiplog=False):
)
+def scan_process(
+ game_data,
+ resources,
+ detection,
+ src,
+ conn,
+ transaction_id,
+ filepath,
+ author,
+ version,
+ source_status,
+ user,
+ skiplog,
+):
+ """
+ Entry point for processing logic for scan.dat.
+ First Pass - Update all files with matching checksum and file size.
+ Second Pass - Filter candidate with matching with filename, filesize and filechecksum
+ - Perform matching.
+ """
+
+ manual_merged_filesets = 0
+ automatic_merged_filesets = 0
+ match_with_full_fileset = 0
+ mismatch_with_full_fileset = 0
+ dropped_early_no_candidate = 0
+ manual_merged_with_detection = 0
+ filesets_with_missing_files = 0
+
+ id_to_fileset_mapping = defaultdict(dict)
+
+ for fileset in game_data:
+ key = calc_key(fileset)
+ megakey = ""
+ log_text = f"State {source_status}."
+
+ (fileset_id, existing) = insert_new_fileset(
+ fileset,
+ conn,
+ detection,
+ src,
+ key,
+ megakey,
+ transaction_id,
+ log_text,
+ user,
+ skiplog=skiplog,
+ )
+ if existing:
+ continue
+
+ id_to_fileset_mapping[fileset_id] = fileset
+
+ # set of filesets whose files got updated
+ filesets_check_for_full = set()
+
+ for rom in fileset["rom"]:
+ scan_update_files(rom, filesets_check_for_full, transaction_id, conn)
+
+ for fileset_id, fileset in id_to_fileset_mapping.items():
+ candidate_filesets = scan_filter_candidate_filesets(
+ fileset_id, fileset, transaction_id, conn
+ )
+
+ if len(candidate_filesets) == 0:
+ category_text = "Drop fileset - No Candidates"
+ fileset_name = fileset["name"] if "name" in fileset else ""
+ fileset_description = (
+ fileset["description"] if "description" in fileset else ""
+ )
+ log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+ create_log(
+ escape_string(category_text), user, escape_string(log_text), conn
+ )
+ dropped_early_no_candidate += 1
+ delete_original_fileset(fileset_id, conn)
+ continue
+
+ (
+ automatic_merged_filesets,
+ manual_merged_filesets,
+ match_with_full_fileset,
+ mismatch_with_full_fileset,
+ manual_merged_with_detection,
+ filesets_with_missing_files,
+ ) = scan_perform_match(
+ fileset,
+ src,
+ user,
+ fileset_id,
+ detection,
+ candidate_filesets,
+ automatic_merged_filesets,
+ manual_merged_filesets,
+ match_with_full_fileset,
+ mismatch_with_full_fileset,
+ manual_merged_with_detection,
+ filesets_with_missing_files,
+ conn,
+ skiplog,
+ )
+
+ # Final log
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
+ (transaction_id,),
+ )
+ fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
+ category_text = f"Uploaded from {src}"
+ log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
+ create_log(escape_string(category_text), user, escape_string(log_text), conn)
+ category_text = "Upload information"
+ log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}."
+ create_log(escape_string(category_text), user, escape_string(log_text), conn)
+
+
+def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
+ """
+ Updates all the checksums for the files matching by a checksum and size.
+ """
+ with conn.cursor() as cursor:
+ checksums = defaultdict(str)
+ for key in rom:
+ if key not in ["name", "size", "size-r", "size-rd", "modification-time"]:
+ checksums[key] = rom[key]
+
+ files_to_update = set()
+
+ for _, checksum in checksums.items():
+ query = """
+ SELECT f.id as file_id, fs.id as fileset_id
+ FROM file f
+ JOIN filechecksum fc ON fc.file = f.id
+ JOIN fileset fs ON fs.id = f.fileset
+ JOIN transactions t ON t.fileset = fs.id
+ WHERE fc.checksum = %s
+ AND f.size = %s
+ AND f.`size-r` = %s
+ AND f.`size-rd` = %s
+ AND t.transaction != %s
+ """
+ size = rom["size"] if "size" in rom else 0
+ size_r = rom["size-r"] if "size-r" in rom else 0
+ size_rd = rom["size-rd"] if "size-rd" in rom else 0
+ cursor.execute(query, (checksum, size, size_r, size_rd, transaction_id))
+ result = cursor.fetchall()
+ if result:
+ for file in result:
+ filesets_check_for_full.add(file["fileset_id"])
+ files_to_update.add(file["file_id"])
+
+ for file_id in files_to_update:
+ query = """
+ DELETE FROM filechecksum
+ WHERE file = %s
+ """
+ cursor.execute(query, (file_id,))
+ for check, checksum in checksums.items():
+ checksize, checktype, checksum = get_checksum_props(check, checksum)
+ query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+ cursor.execute(query, (file_id, checksize, checktype, checksum))
+
+ conn.commit()
+
+
+def scan_perform_match(
+ fileset,
+ src,
+ user,
+ fileset_id,
+ detection,
+ candidate_filesets,
+ automatic_merged_filesets,
+ manual_merged_filesets,
+ match_with_full_fileset,
+ mismatch_with_full_fileset,
+ manual_merged_with_detection,
+ filesets_with_missing_files,
+ conn,
+ skiplog,
+):
+ """
+ Performs matching for scan.dat.
+ If single candidate for match:
+ detection -> Copy all the files and checksums from scan.
+ partial -> Copy all the files and checksums from scan.
+ full -> Drop the scan fileset. But show the differences in file if any.
+ If more than one candidate for match:
+ Put them for manual merge.
+ """
+ with conn.cursor() as cursor:
+ if len(candidate_filesets) == 1:
+ matched_fileset_id = candidate_filesets[0]
+ cursor.execute(
+ "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
+ )
+ status = cursor.fetchone()["status"]
+ # Partial filesets can be turned full directly, as the files have already been updated.
+ # But the files that had missing size were not updated, so we need to check.
+ if status == "partial":
+ # Partial filesets contain all the files, so does the scanned filesets, so this case should not ideally happen.
+ if total_files(matched_fileset_id, conn) > total_fileset_files(fileset):
+ category_text = "Missing files"
+ log_text = f"Missing files in Fileset:{fileset_id}. Try manual merge with Fileset:{matched_fileset_id}."
+ add_manual_merge(
+ candidate_filesets,
+ fileset_id,
+ category_text,
+ log_text,
+ user,
+ conn,
+ log_text,
+ )
+ filesets_with_missing_files += 1
+
+ else:
+ update_all_files(fileset, matched_fileset_id, False, conn)
+ update_fileset_status(cursor, matched_fileset_id, "full")
+ if not skiplog:
+ log_matched_fileset(
+ src,
+ fileset_id,
+ matched_fileset_id,
+ "full",
+ user,
+ conn,
+ )
+ delete_original_fileset(fileset_id, conn)
+ automatic_merged_filesets += 1
+
+ # Detection filests can be turned full if the number of files are equal,
+ # otherwise we do manual merge to remove extra files.
+ elif status == "detection":
+ if total_fileset_files(fileset) == total_files(
+ matched_fileset_id, conn, detection_only=True
+ ):
+ update_all_files(fileset, matched_fileset_id, True, conn)
+ update_fileset_status(cursor, matched_fileset_id, "full")
+ if not skiplog:
+ log_matched_fileset(
+ src,
+ fileset_id,
+ matched_fileset_id,
+ "full",
+ user,
+ conn,
+ )
+ delete_original_fileset(fileset_id, conn)
+ automatic_merged_filesets += 1
+
+ else:
+ category_text = "Manual Merge - Detection found"
+ log_text = f"Matched with detection. Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}."
+ add_manual_merge(
+ candidate_filesets,
+ fileset_id,
+ category_text,
+ log_text,
+ user,
+ conn,
+ log_text,
+ )
+ manual_merged_with_detection += 1
+
+ # Drop the fileset, note down the file differences
+ elif status == "full":
+ (unmatched_candidate_files, unmatched_scan_files) = get_unmatched_files(
+ matched_fileset_id, fileset, conn
+ )
+ fully_matched = (
+ True
+ if len(unmatched_candidate_files) == 0
+ and len(unmatched_scan_files) == 0
+ else False
+ )
+ if fully_matched:
+ match_with_full_fileset += 1
+ else:
+ mismatch_with_full_fileset += 1
+ log_scan_match_with_full(
+ fileset_id,
+ matched_fileset_id,
+ unmatched_candidate_files,
+ unmatched_scan_files,
+ fully_matched,
+ user,
+ conn,
+ )
+ delete_original_fileset(fileset_id, conn)
+
+ elif len(candidate_filesets) > 1:
+ category_text = "Manual Merge - Multiple Candidates"
+ log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}."
+ manual_merged_filesets += 1
+ add_manual_merge(
+ candidate_filesets,
+ fileset_id,
+ category_text,
+ log_text,
+ user,
+ conn,
+ log_text,
+ )
+
+ return (
+ automatic_merged_filesets,
+ manual_merged_filesets,
+ match_with_full_fileset,
+ mismatch_with_full_fileset,
+ manual_merged_with_detection,
+ filesets_with_missing_files,
+ )
+
+
+def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn):
+ """
+ Updates all the files, if they were missed out earlier due to missing size.
+ """
+ with conn.cursor() as cursor:
+ # Extracting the filename from the filepath.
+ cursor.execute(
+ f"SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = {candidate_fileset_id}"
+ )
+ target_files = cursor.fetchall()
+ candidate_files = {
+ target_file["id"]: target_file["name"].lower()
+ for target_file in target_files
+ }
+
+ scan_checksums = set()
+ scan_names_by_checksum = defaultdict(str)
+ same_filename_count = defaultdict(int)
+
+ filename_to_filepath_map = defaultdict(str)
+ filepath_to_checksum_map = defaultdict(dict)
+ filepath_to_sizes_map = defaultdict(dict)
+
+ for file in fileset["rom"]:
+ base_name = os.path.basename(normalised_path(file["name"])).lower()
+ checksums = defaultdict(str)
+ sizes = defaultdict(int)
+ for key in file:
+ if key.startswith("md5"):
+ scan_checksums.add((file[key], base_name))
+ scan_names_by_checksum[(file[key], base_name)] = file["name"]
+ checksums[key] = file[key]
+ if key.startswith("size"):
+ sizes[key] = file[key]
+
+ filepath_to_sizes_map[file["name"]] = sizes
+ filepath_to_checksum_map[file["name"]] = checksums
+ same_filename_count[base_name] += 1
+ filename_to_filepath_map[base_name] = file["name"]
+
+ checksums = defaultdict(dict)
+ filepath = ""
+
+ for file_id, file_name in candidate_files.items():
+ file_name = file_name.lower()
+ # Match by filename
+ if same_filename_count[file_name] == 1:
+ filepath = filename_to_filepath_map[file_name]
+ checksums = filepath_to_checksum_map[filepath]
+
+ # If same filename occurs multiple times, fallback to checksum based match
+ else:
+ cursor.execute(
+ "SELECT checksum FROM filechecksum WHERE file = %s", (file_id,)
+ )
+ checksum_rows = cursor.fetchall()
+ for row in checksum_rows:
+ checksum = row["checksum"]
+ if (checksum, file_name) in scan_checksums:
+ filepath = scan_names_by_checksum[(checksum, file_name)]
+ checksums = filepath_to_checksum_map[filepath]
+
+ # Delete older checksums
+ query = """
+ DELETE FROM filechecksum
+ WHERE file = %s
+ """
+ cursor.execute(query, (file_id,))
+ # Update the checksums
+ for key, checksum in checksums.items():
+ checksize, checktype, checksum = get_checksum_props(key, checksum)
+ query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+ cursor.execute(query, (file_id, checksize, checktype, checksum))
+
+ # Also updates the sizes, do not update the name if fileset not in detection state
+ query = """
+ UPDATE file
+ SET size = %s,
+ `size-r` = %s,
+ `size-rd` = %s
+ """
+ sizes = filepath_to_sizes_map[filepath]
+ print(sizes)
+ if is_candidate_detection:
+ query += ",name = %s WHERE id = %s"
+ params = (
+ sizes["size"],
+ sizes["size-r"],
+ sizes["size-rd"],
+ normalised_path(filepath),
+ file_id,
+ )
+ else:
+ query += "WHERE id = %s"
+ params = (sizes["size"], sizes["size-r"], sizes["size-rd"], file_id)
+ cursor.execute(query, params)
+
+
+def total_files(fileset_id, conn, detection_only=False):
+ """
+ Returns the total number of files (only detection files if detection_only set to true) present in the given fileset from the database.
+ """
+ with conn.cursor() as cursor:
+ query = """
+ SELECT COUNT(*) AS count
+ FROM file f
+ JOIN fileset fs ON fs.id = f.fileset
+ """
+ if detection_only:
+ query += """
+ WHERE f.detection = 1
+ AND fs.id = %s
+ """
+ else:
+ query += "WHERE fs.id = %s"
+ cursor.execute(query, (fileset_id,))
+ return cursor.fetchone()["count"]
+
+
+def total_fileset_files(fileset):
+ """
+ Returns the number of files present in the fileset
+ """
+ return len(fileset["rom"])
+
+
+def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
+ """
+ Returns a list of candidate filesets that can be merged
+ """
+ with conn.cursor() as cursor:
+ # Returns those filesets which have all detection files matching in the scan fileset filtered by file name and file size(if not -1).
+
+ query = """
+ WITH candidate_fileset AS (
+ SELECT fs.id AS fileset_id, f.name, f.size,
+ f.`size-r` AS size_r, f.`size-rd` AS size_rd
+ FROM file f
+ JOIN fileset fs ON f.fileset = fs.id
+ JOIN game g ON g.id = fs.game
+ JOIN transactions t ON t.fileset = fs.id
+ WHERE f.detection = 1
+ AND t.transaction != %s
+ ),
+ total_detection_files AS (
+ SELECT cf.fileset_id, COUNT(*) AS detection_files_found
+ FROM candidate_fileset cf
+ GROUP BY fileset_id
+ ),
+ set_fileset AS (
+ SELECT name, size,
+ `size-r` AS size_r, `size-rd` AS size_rd
+ FROM file
+ WHERE fileset = %s
+ ),
+ matched_detection_files AS (
+ SELECT cf.fileset_id, COUNT(*) AS match_files_count
+ FROM candidate_fileset cf
+ JOIN set_fileset sf ON ( (
+ cf.name = sf.name
+ OR
+ REGEXP_REPLACE(cf.name, '^.*[\\\\/]', '') = REGEXP_REPLACE(sf.name, '^.*[\\\\/]', '')
+ ) AND (cf.size = sf.size OR cf.size = -1)
+ AND (cf.size_r = sf.size_r)
+ AND (cf.size_rd = sf.size_rd))
+ GROUP BY cf.fileset_id
+ ),
+ valid_matched_detection_files AS (
+ SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
+ FROM matched_detection_files mdf
+ JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
+ WHERE tdf.detection_files_found <= mdf.match_files_count
+ ),
+ max_match_count AS (
+ SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
+ )
+ SELECT vmdf.fileset_id
+ FROM valid_matched_detection_files vmdf
+ JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
+ JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
+ """
+
+ cursor.execute(query, (transaction_id, fileset_id))
+ rows = cursor.fetchall()
+
+ candidates = []
+ if rows:
+ for row in rows:
+ candidates.append(row["fileset_id"])
+
+ for candidate in candidates:
+ if not is_full_detection_checksum_match(candidate, fileset, conn):
+ candidates.remove(candidate)
+
+ return candidates
+
+
+def get_unmatched_files(candidate_fileset, fileset, conn):
+ """
+ Checks if all checksums from candidate_fileset match scan file checksums.
+ Returns:
+ unmatched_candidate_files: candidate files whose checksums weren't found in scan
+ unmatched_scan_files: scan files whose checksums weren't matched by candidate
+ """
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT id, name FROM file WHERE fileset = %s", (candidate_fileset,)
+ )
+ candidate_file_rows = cursor.fetchall()
+ candidate_files = {row["id"]: row["name"] for row in candidate_file_rows}
+
+ scan_checksums = set()
+ scan_names_by_checksum = {}
+
+ for file in fileset["rom"]:
+ base_name = os.path.basename(normalised_path(file["name"])).lower()
+ for key in file:
+ if key.startswith("md5"):
+ scan_checksums.add((file[key], base_name))
+ scan_names_by_checksum[(file[key], base_name)] = file["name"]
+
+ unmatched_candidate_files = []
+ matched_scan_pairs = set()
+
+ for file_id, file_name in candidate_files.items():
+ cursor.execute(
+ "SELECT checksum FROM filechecksum WHERE file = %s", (file_id,)
+ )
+ checksum_rows = cursor.fetchall()
+
+ base_name = os.path.basename(file_name).lower()
+ match_found = False
+
+ for row in checksum_rows:
+ checksum = row["checksum"]
+ if (checksum, base_name) in scan_checksums:
+ matched_scan_pairs.add((checksum, base_name))
+ match_found = True
+
+ if not match_found:
+ unmatched_candidate_files.append(file_name)
+
+ unmatched_scan_files = {
+ scan_names_by_checksum[key]
+ for key in scan_checksums
+ if key not in matched_scan_pairs
+ }
+ unmatched_scan_files = list(unmatched_scan_files)
+
+ return (unmatched_candidate_files, unmatched_scan_files)
+
+
+def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
+ """
+ Return type - Boolean
+ Checks if all the detection files in the candidate fileset have corresponding checksums matching with scan.
+
+ scan - rom ( name "AFM Read Me!_2" size 8576 size-r 1 size-rd 0 modification-time 1993-05-12 md5 dsd16ccea050db521a678a1cdc33794c md5-5000 008e76ec3ae58d0add637ea7aa299a2a md5-t-5000 118e76ec3ae58d0add637ea7aa299a2c md5-1048576 37d16ccea050db521a678a1cdc33794c)
+ """
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT id, name FROM file WHERE detection=1 AND fileset = %s",
+ (candidate_fileset,),
+ )
+ target_files = cursor.fetchall()
+ candidate_files = {
+ target_file["id"]: target_file["name"] for target_file in target_files
+ }
+
+ # set of (checksum, filename)
+ scan_checksums = set()
+ for file in fileset["rom"]:
+ for key in file:
+ if key.startswith("md5"):
+ name = os.path.basename(normalised_path(file["name"]))
+ scan_checksums.add((file[key], name.lower()))
+
+ for detection_file_id, detection_file_name in candidate_files.items():
+ query = """
+ SELECT fc.checksum, fc.checksize, fc.checktype
+ FROM filechecksum fc
+ WHERE fc.file = %s
+ """
+ cursor.execute(query, (detection_file_id,))
+ checksums_info = cursor.fetchall()
+ match_found = False
+ if checksums_info:
+ for checksum_info in checksums_info:
+ checksum = checksum_info["checksum"]
+ if (
+ checksum,
+ os.path.basename(detection_file_name.lower()),
+ ) not in scan_checksums:
+ match_found = True
+ break
+
+ if match_found:
+ return False
+
+ return True
+
+
+# -------------------------------------------------------------------------------------------------------
+# Set.dat processing below
+# -------------------------------------------------------------------------------------------------------
+
+
def set_process(
game_data,
resources,
@@ -2085,6 +2722,27 @@ def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
update_history(fileset_last, fileset_id, conn, log_last)
+def log_scan_match_with_full(
+ fileset_last,
+ candidate_id,
+ unmatched_candidate_files,
+ unmatched_scan_files,
+ fully_matched,
+ user,
+ conn,
+):
+ category_text = "Mismatch with Full set"
+ if fully_matched:
+ category_text = "Existing as Full set."
+ log_text = f"""Files mismatched with Full Fileset:{candidate_id}. Unmatched Files in scan fileset = {len(unmatched_scan_files)}. Unmatched Files in full fileset = {len(unmatched_candidate_files)}. List of unmatched files scan.dat : {", ".join(scan_file for scan_file in unmatched_scan_files)}, List of unmatched files full fileset : {", ".join(scan_file for scan_file in unmatched_candidate_files)}"""
+ if fully_matched:
+ log_text = (
+ f"Fileset matched completely with Full Fileset:{candidate_id}. Dropping."
+ )
+ print(log_text)
+ create_log(escape_string(category_text), user, escape_string(log_text), conn)
+
+
def finalize_fileset_insertion(
conn, transaction_id, src, filepath, author, version, source_status, user
):
Commit: 18deb0da65d922f33e5c75cc47fdfc8d6299e3cd
https://github.com/scummvm/scummvm-sites/commit/18deb0da65d922f33e5c75cc47fdfc8d6299e3cd
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add additional modification-time column in file table.
Changed paths:
dat_parser.py
db_functions.py
fileset.py
schema.py
diff --git a/dat_parser.py b/dat_parser.py
index b3ce12e..a76480b 100644
--- a/dat_parser.py
+++ b/dat_parser.py
@@ -33,6 +33,9 @@ def map_checksum_data(content_string):
elif tokens[i] == "size-rd":
current_rom["size-rd"] = int(tokens[i + 1])
i += 2
+ elif tokens[i] == "modification-time":
+ current_rom["modification-time"] = tokens[i + 1]
+ i += 2
else:
checksum_key = tokens[i]
checksum_value = tokens[i + 1] if len(tokens) >= 6 else "0"
diff --git a/db_functions.py b/db_functions.py
index 683fb63..408ea29 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -234,13 +234,16 @@ def insert_file(file, detection, src, conn):
values.append(file["size-r"] if "size-r" in file else "0")
values.append(file["size-rd"] if "size-rd" in file else "0")
+ modification_time = file["modification-time"] if "modification-time" in file else ""
+ values.append(modification_time)
+
values.extend([checksum, detection, detection_type])
# Parameterised Query
placeholders = (
- ["%s"] * (len(values[:5])) + ["@fileset_last"] + ["%s"] * 2 + ["NOW()"]
+ ["%s"] * (len(values[:6])) + ["@fileset_last"] + ["%s"] * 2 + ["NOW()"]
)
- query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})"
+ query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})"
with conn.cursor() as cursor:
cursor.execute(query, values)
diff --git a/fileset.py b/fileset.py
index c8e9cf7..a930f54 100644
--- a/fileset.py
+++ b/fileset.py
@@ -189,7 +189,6 @@ def fileset():
query = """SELECT game.name as 'game name', engineid, gameid, extra, platform, language, fileset.set_dat_metadata FROM fileset JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine WHERE fileset.id = %s"""
else:
query = """SELECT game.name as 'game name', engineid, gameid, extra, platform, language FROM fileset JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine WHERE fileset.id = %s"""
- print(query)
cursor.execute(query, (id,))
result = {**result, **cursor.fetchone()}
else:
@@ -240,6 +239,7 @@ def fileset():
"detection",
"detection_type",
"timestamp",
+ "modification-time",
]
if sort:
@@ -250,13 +250,10 @@ def fileset():
if "desc" in sort:
order += " DESC"
- columns_to_select = "file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`"
+ columns_to_select = "file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`, `modification-time`"
columns_to_select += ", ".join(md5_columns)
- print(
- f"SELECT file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp` FROM file WHERE fileset = {id} {order}"
- )
cursor.execute(
- f"SELECT file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp` FROM file WHERE fileset = {id} {order}"
+ f"SELECT file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`, `modification-time` FROM file WHERE fileset = {id} {order}"
)
result = cursor.fetchall()
diff --git a/schema.py b/schema.py
index 9bf42ee..776eada 100644
--- a/schema.py
+++ b/schema.py
@@ -244,6 +244,22 @@ def init_database():
"ALTER TABLE log MODIFY COLUMN `text` VARCHAR(5000);",
)
+ migrate_column(
+ cursor,
+ "fileset",
+ "set_dat_metadata",
+ "ALTER TABLE fileset ADD COLUMN set_dat_metadata VARCHAR(5000) DEFAULT '';",
+ "ALTER TABLE fileset MODIFY COLUMN set_dat_metadata VARCHAR(5000) DEFAULT '';",
+ )
+
+ migrate_column(
+ cursor,
+ "file",
+ "modification-time",
+ "ALTER TABLE file ADD COLUMN `modification-time` VARCHAR(100) DEFAULT '';",
+ "ALTER TABLE file MODIFY COLUMN `modification-time` VARCHAR(100) DEFAULT '';",
+ )
+
for index, definition in indices.items():
try:
cursor.execute(definition)
Commit: d27e751324233ca6c0cfa3cda9863a00b1daf75b
https://github.com/scummvm/scummvm-sites/commit/d27e751324233ca6c0cfa3cda9863a00b1daf75b
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Additional error handling while extracing keys from scummvm.dat.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 408ea29..f9db3bc 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -546,13 +546,19 @@ def db_insert(data_arr, username=None, skiplog=False):
megakey = calc_megakey(fileset)
if detection:
- engine_name = fileset["engine"]
- engineid = fileset["sourcefile"]
- gameid = fileset["name"]
- title = fileset["title"]
- extra = fileset["extra"]
- platform = fileset["platform"]
- lang = fileset["language"]
+ try:
+ engine_name = fileset.get("engine", "")
+ engineid = fileset["sourcefile"]
+ gameid = fileset["name"]
+ title = fileset.get("title", "")
+ extra = fileset.get("extra", "")
+ platform = fileset.get("platform", "")
+ lang = fileset.get("language", "")
+ except KeyError as e:
+ print(
+ f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}"
+ )
+ return
with conn.cursor() as cursor:
query = """
Commit: db8ab76b5c0427ab5e342a664bb78faf2c6fa843
https://github.com/scummvm/scummvm-sites/commit/db8ab76b5c0427ab5e342a664bb78faf2c6fa843
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Traverse set.dat instead of candidate fileset while searching mismatched files.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index f9db3bc..6906a64 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -281,8 +281,6 @@ def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
size_name = "size"
if checktype[-1] == "r":
size_name += "-rd"
- if checktype[-1] == "s":
- size_name += "-d"
cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = {file_id}")
result = cursor.fetchone()
@@ -1345,7 +1343,6 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
`size-rd` = %s
"""
sizes = filepath_to_sizes_map[filepath]
- print(sizes)
if is_candidate_detection:
query += ",name = %s WHERE id = %s"
params = (
@@ -1462,10 +1459,10 @@ def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
def get_unmatched_files(candidate_fileset, fileset, conn):
"""
- Checks if all checksums from candidate_fileset match scan file checksums.
+ Checks if all checksums from candidate_fileset match dat file checksums.
Returns:
unmatched_candidate_files: candidate files whose checksums weren't found in scan
- unmatched_scan_files: scan files whose checksums weren't matched by candidate
+ unmatched_dat_files: dat files whose checksums weren't matched by candidate
"""
with conn.cursor() as cursor:
cursor.execute(
@@ -1474,18 +1471,18 @@ def get_unmatched_files(candidate_fileset, fileset, conn):
candidate_file_rows = cursor.fetchall()
candidate_files = {row["id"]: row["name"] for row in candidate_file_rows}
- scan_checksums = set()
- scan_names_by_checksum = {}
+ dat_checksums = set()
+ dat_names_by_checksum = {}
for file in fileset["rom"]:
base_name = os.path.basename(normalised_path(file["name"])).lower()
for key in file:
if key.startswith("md5"):
- scan_checksums.add((file[key], base_name))
- scan_names_by_checksum[(file[key], base_name)] = file["name"]
+ dat_checksums.add((file[key], base_name))
+ dat_names_by_checksum[(file[key], base_name)] = file["name"]
unmatched_candidate_files = []
- matched_scan_pairs = set()
+ matched_dat_pairs = set()
for file_id, file_name in candidate_files.items():
cursor.execute(
@@ -1498,21 +1495,21 @@ def get_unmatched_files(candidate_fileset, fileset, conn):
for row in checksum_rows:
checksum = row["checksum"]
- if (checksum, base_name) in scan_checksums:
- matched_scan_pairs.add((checksum, base_name))
+ if (checksum, base_name) in dat_checksums:
+ matched_dat_pairs.add((checksum, base_name))
match_found = True
if not match_found:
unmatched_candidate_files.append(file_name)
- unmatched_scan_files = {
- scan_names_by_checksum[key]
- for key in scan_checksums
- if key not in matched_scan_pairs
+ unmatched_dat_files = {
+ dat_names_by_checksum[key]
+ for key in dat_checksums
+ if key not in matched_dat_pairs
}
- unmatched_scan_files = list(unmatched_scan_files)
+ unmatched_dat_files = list(unmatched_dat_files)
- return (unmatched_candidate_files, unmatched_scan_files)
+ return (unmatched_candidate_files, unmatched_dat_files)
def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
@@ -1524,7 +1521,7 @@ def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
"""
with conn.cursor() as cursor:
cursor.execute(
- "SELECT id, name FROM file WHERE detection=1 AND fileset = %s",
+ "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name FROM file WHERE detection=1 AND fileset = %s",
(candidate_fileset,),
)
target_files = cursor.fetchall()
@@ -1682,7 +1679,7 @@ def set_process(
console_message = "Candidate filtering finished."
console_log(console_message)
console_message = (
- f"{dropped_early_no_candidate} Filesets Dropped - No candidates found."
+ f"{dropped_early_no_candidate} Filesets Dropped for No candidates."
)
console_log(console_message)
console_message = "Looking for duplicates..."
@@ -1872,9 +1869,15 @@ def set_perform_match(
matched_fileset_id, manual_merge_map, set_to_candidate_dict, conn
)
elif status == "partial" or status == "full":
- (is_match, unmatched_files) = is_full_checksum_match(
+ (unmatched_candidate_files, unmatched_dat_files) = get_unmatched_files(
matched_fileset_id, fileset, conn
)
+ is_match = (
+ True
+ if len(unmatched_candidate_files) == 0
+ and len(unmatched_dat_files) == 0
+ else False
+ )
if is_match:
category_text = "Already present"
log_text = f"Already present as - Fileset:{matched_fileset_id}. Deleting Fileset:{fileset_id}"
@@ -1890,7 +1893,8 @@ def set_perform_match(
else:
category_text = "Mismatch"
- log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge."
+ log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge. Unmatched Files in set.dat fileset = {len(unmatched_dat_files)} Unmatched Files in candidate fileset = {len(unmatched_candidate_files)}. List of unmatched files scan.dat : {', '.join(scan_file for scan_file in unmatched_dat_files)}, List of unmatched files full fileset : {', '.join(scan_file for scan_file in unmatched_candidate_files)}"
+ console_log(log_text)
# print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
mismatch_filesets += 1
add_manual_merge(
@@ -1904,7 +1908,6 @@ def set_perform_match(
elif len(candidate_filesets) > 1:
found_match = False
-
for candidate_fileset in candidate_filesets:
(is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
if is_match:
Commit: a6834242059b6d4e35b6b1ca76c12148b5c6216d
https://github.com/scummvm/scummvm-sites/commit/a6834242059b6d4e35b6b1ca76c12148b5c6216d
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add checksum based filtering in set.dat, when possible.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 6906a64..3a4fc12 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -977,7 +977,9 @@ def scan_process(
id_to_fileset_mapping = defaultdict(dict)
+ fileset_count = 0
for fileset in game_data:
+ console_log_file_update(fileset_count)
key = calc_key(fileset)
megakey = ""
log_text = f"State {source_status}."
@@ -1003,9 +1005,12 @@ def scan_process(
filesets_check_for_full = set()
for rom in fileset["rom"]:
- scan_update_files(rom, filesets_check_for_full, transaction_id, conn)
+ pre_update_files(rom, filesets_check_for_full, transaction_id, conn)
+ fileset_count += 1
+ fileset_count = 0
for fileset_id, fileset in id_to_fileset_mapping.items():
+ console_log_matching(fileset_count)
candidate_filesets = scan_filter_candidate_filesets(
fileset_id, fileset, transaction_id, conn
)
@@ -1047,6 +1052,7 @@ def scan_process(
conn,
skiplog,
)
+ fileset_count += 1
# Final log
with conn.cursor() as cursor:
@@ -1063,7 +1069,7 @@ def scan_process(
create_log(escape_string(category_text), user, escape_string(log_text), conn)
-def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
+def pre_update_files(rom, filesets_check_for_full, transaction_id, conn):
"""
Updates all the checksums for the files matching by a checksum and size.
"""
@@ -1074,6 +1080,9 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
checksums[key] = rom[key]
files_to_update = set()
+ size = rom["size"] if "size" in rom else 0
+ size_r = rom["size-r"] if "size-r" in rom else 0
+ size_rd = rom["size-rd"] if "size-rd" in rom else 0
for _, checksum in checksums.items():
query = """
@@ -1088,9 +1097,7 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
AND f.`size-rd` = %s
AND t.transaction != %s
"""
- size = rom["size"] if "size" in rom else 0
- size_r = rom["size-r"] if "size-r" in rom else 0
- size_rd = rom["size-rd"] if "size-rd" in rom else 0
+
cursor.execute(query, (checksum, size, size_r, size_rd, transaction_id))
result = cursor.fetchall()
if result:
@@ -1104,12 +1111,20 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
WHERE file = %s
"""
cursor.execute(query, (file_id,))
+ # Update checksums
for check, checksum in checksums.items():
checksize, checktype, checksum = get_checksum_props(check, checksum)
query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
cursor.execute(query, (file_id, checksize, checktype, checksum))
-
- conn.commit()
+ # Update sizes
+ query = """
+ UPDATE file
+ SET size = %s,
+ `size-r` = %s,
+ `size-rd` = %s,
+ WHERE id = %s
+ """
+ cursor.execute(query, size, size_r, size_rd, file_id)
def scan_perform_match(
@@ -1907,31 +1922,7 @@ def set_perform_match(
)
elif len(candidate_filesets) > 1:
- found_match = False
- for candidate_fileset in candidate_filesets:
- (is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
- if is_match:
- update_fileset_status(cursor, candidate_fileset, "partial")
- set_populate_file(fileset, candidate_fileset, conn, detection)
- auto_merged_filesets += 1
- if not skiplog:
- log_matched_fileset(
- src,
- fileset_id,
- candidate_fileset,
- "partial",
- user,
- conn,
- )
- delete_original_fileset(fileset_id, conn)
- remove_manual_merge_if_size_mismatch(
- candidate_fileset, manual_merge_map, set_to_candidate_dict, conn
- )
- found_match = True
- break
-
- if not found_match:
- manual_merge_map[fileset_id] = candidate_filesets
+ manual_merge_map[fileset_id] = candidate_filesets
return (
fully_matched_filesets,
@@ -2160,8 +2151,7 @@ def set_filter_candidate_filesets(
JOIN game g ON g.id = fs.game
JOIN engine e ON e.id = g.engine
JOIN transactions t ON t.fileset = fs.id
- WHERE fs.id != %s
- AND e.engineid = %s
+ WHERE e.engineid = %s
AND f.detection = 1
AND t.transaction != %s
),
@@ -2199,9 +2189,7 @@ def set_filter_candidate_filesets(
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
"""
- cursor.execute(
- query, (fileset_id, fileset["sourcefile"], transaction_id, fileset_id)
- )
+ cursor.execute(query, (fileset["sourcefile"], transaction_id, fileset_id))
rows = cursor.fetchall()
candidates = []
@@ -2209,9 +2197,76 @@ def set_filter_candidate_filesets(
for row in rows:
candidates.append(row["fileset_id"])
+ matched_candidates = []
+
+ candidates = [
+ candidate
+ for candidate in candidates
+ if is_candidate_by_checksize(candidate, fileset, conn)
+ ]
+
+ for candidate in candidates:
+ if is_full_detection_checksum_match(candidate, fileset, conn):
+ matched_candidates.append(candidate)
+
+ if len(matched_candidates) != 0:
+ candidates = matched_candidates
+
return (candidates, fileset_count)
+def is_candidate_by_checksize(candidate, fileset, conn):
+ with conn.cursor() as cursor:
+ cursor.execute(
+ "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE detection=1 AND fileset = %s",
+ (candidate,),
+ )
+ target_files = cursor.fetchall()
+ candidate_files = {
+ target_file["id"]: [target_file["name"], target_file["size"]]
+ for target_file in target_files
+ }
+
+ # set of (checksum, filename)
+ scan_checksums = set()
+ for file in fileset["rom"]:
+ for key in file:
+ if key.startswith("md5"):
+ name = os.path.basename(normalised_path(file["name"]))
+ scan_checksums.add((file[key], name.lower()))
+
+ for detection_file_id, [
+ detection_file_name,
+ detection_file_size,
+ ] in candidate_files.items():
+ query = """
+ SELECT fc.checksum, fc.checksize, fc.checktype
+ FROM filechecksum fc
+ WHERE fc.file = %s
+ """
+ cursor.execute(query, (detection_file_id,))
+ checksums_info = cursor.fetchall()
+ if checksums_info:
+ for checksum_info in checksums_info:
+ checksum = checksum_info["checksum"]
+ checksize = checksum_info["checksize"]
+ if checksize == "1M":
+ checksize = 1048576
+ if (
+ (
+ checksum,
+ os.path.basename(detection_file_name.lower()),
+ )
+ not in scan_checksums
+ and detection_file_size <= int(checksize)
+ and detection_file_size != -1
+ ):
+ continue
+ else:
+ return True
+ return False
+
+
def process_fileset(
fileset,
resources,
@@ -2972,6 +3027,11 @@ def console_log_candidate_filtering(fileset_count):
sys.stdout.flush()
+def console_log_file_update(fileset_count):
+ sys.stdout.write(f"Updating files - Fileset {fileset_count}\r")
+ sys.stdout.flush()
+
+
def console_log_matching(fileset_count):
sys.stdout.write(f"Performing Match - Fileset {fileset_count}\r")
sys.stdout.flush()
Commit: b8675f477ebd0274b93c7b2494440b8e41bafd4a
https://github.com/scummvm/scummvm-sites/commit/b8675f477ebd0274b93c7b2494440b8e41bafd4a
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Remove 'obsolete' fileset status entirely.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 3a4fc12..c4e65a2 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -146,9 +146,6 @@ def insert_fileset(
cursor.execute(
f"UPDATE fileset SET `timestamp` = FROM_UNIXTIME(@fileset_time_last) WHERE id = {existing_entry}"
)
- cursor.execute(
- f"UPDATE fileset SET status = 'detection' WHERE id = {existing_entry} AND status = 'obsolete'"
- )
cursor.execute(f"SELECT status FROM fileset WHERE id = {existing_entry}")
status = cursor.fetchone()["status"]
if status == "user":
@@ -610,10 +607,6 @@ def db_insert(data_arr, username=None, skiplog=False):
fileset_count += 1
- if detection:
- conn.cursor().execute(
- "UPDATE fileset SET status = 'obsolete' WHERE `timestamp` != FROM_UNIXTIME(@fileset_time_last) AND status = 'detection'"
- )
cur = conn.cursor()
try:
Commit: 5891b9295d6e74f8bb017c1af7cdd276929532e4
https://github.com/scummvm/scummvm-sites/commit/5891b9295d6e74f8bb017c1af7cdd276929532e4
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add checksum based filtering before filtering by maximum number of files matched.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index c4e65a2..20a3240 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -2130,15 +2130,17 @@ def set_filter_candidate_filesets(
fileset_id, fileset, fileset_count, transaction_id, conn
):
"""
- Returns a list of candidate filesets that can be merged
+ Returns a list of candidate filesets that can be merged.
+ Performs early filtering in SQL (by engine, name, size) and then
+ applies checksum filtering and max-match filtering in Python.
"""
with conn.cursor() as cursor:
- # Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
fileset_count += 1
console_log_candidate_filtering(fileset_count)
+
+ # Early filter candidates using enginename, filename and size
query = """
- WITH candidate_fileset AS (
- SELECT fs.id AS fileset_id, f.name, f.size
+ SELECT fs.id AS fileset_id, f.id AS file_id, f.name, f.size
FROM file f
JOIN fileset fs ON f.fileset = fs.id
JOIN game g ON g.id = fs.game
@@ -2147,65 +2149,94 @@ def set_filter_candidate_filesets(
WHERE e.engineid = %s
AND f.detection = 1
AND t.transaction != %s
- ),
- total_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS detection_files_found
- FROM candidate_fileset cf
- GROUP BY fileset_id
- ),
- set_fileset AS (
- SELECT name, size FROM file
- WHERE fileset = %s
- ),
- matched_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS match_files_count
- FROM candidate_fileset cf
- JOIN set_fileset sf ON ( (
- cf.name = sf.name
- OR
- REGEXP_REPLACE(cf.name, '^.*[\\\\/]', '') = REGEXP_REPLACE(sf.name, '^.*[\\\\/]', '')
- ) AND (cf.size = sf.size OR cf.size = -1) )
- GROUP BY cf.fileset_id
- ),
- valid_matched_detection_files AS (
- SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
- FROM matched_detection_files mdf
- JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
- WHERE tdf.detection_files_found <= mdf.match_files_count
- ),
- max_match_count AS (
- SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
- )
- SELECT vmdf.fileset_id
- FROM valid_matched_detection_files vmdf
- JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
- JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
"""
-
- cursor.execute(query, (fileset["sourcefile"], transaction_id, fileset_id))
- rows = cursor.fetchall()
-
- candidates = []
- if rows:
- for row in rows:
- candidates.append(row["fileset_id"])
-
- matched_candidates = []
-
- candidates = [
- candidate
- for candidate in candidates
- if is_candidate_by_checksize(candidate, fileset, conn)
- ]
-
- for candidate in candidates:
- if is_full_detection_checksum_match(candidate, fileset, conn):
- matched_candidates.append(candidate)
-
- if len(matched_candidates) != 0:
- candidates = matched_candidates
-
- return (candidates, fileset_count)
+ cursor.execute(query, (fileset["sourcefile"], transaction_id))
+ raw_candidates = cursor.fetchall()
+
+ # fileset id to detection files map
+ candidate_map = defaultdict(list)
+ total_detection_files_map = defaultdict(int)
+ for row in raw_candidates:
+ candidate_map[row["fileset_id"]].append(
+ {
+ "file_id": row["file_id"],
+ "name": row["name"],
+ "size": row["size"],
+ }
+ )
+ for id, files in candidate_map.items():
+ total_detection_files_map[id] = len(files)
+
+ set_checksums = set()
+ set_file_name_size = set()
+ for file in fileset["rom"]:
+ for key in file:
+ if key.startswith("md5"):
+ name = os.path.basename(normalised_path(file["name"]))
+ set_checksums.add((file[key], name.lower(), int(file["size"])))
+ set_checksums.add((file[key], name.lower(), -1))
+ set_file_name_size.add((name.lower(), -1))
+ set_file_name_size.add((name.lower(), int(file["size"])))
+
+ # Filter candidates by detection filename and file size (including -1) and increase matched file count
+ # if filesize = -1,
+ # elif filesize <= checksize and checksum matches,
+ # elif filesize > checksize.
+ match_counts = {}
+ for fileset_id, files in candidate_map.items():
+ count = 0
+ with conn.cursor() as cursor:
+ for f in files:
+ filename = os.path.basename(f["name"]).lower()
+ filesize = f["size"]
+ if (filename, filesize) in set_file_name_size:
+ if filesize == -1:
+ count += 1
+ else:
+ cursor.execute(
+ """
+ SELECT checksum, checksize, checktype
+ FROM filechecksum
+ WHERE file = %s
+ """,
+ (f["file_id"],),
+ )
+ checksums = cursor.fetchall()
+ not_inc_count = False
+ for c in checksums:
+ checksum = c["checksum"]
+ checksize = c["checksize"]
+ if checksize == "1M":
+ checksize = 1048576
+ elif checksize == "0":
+ checksize = filesize
+ if filesize <= int(checksize):
+ if (checksum, filename, filesize) in set_checksums:
+ count += 1
+ not_inc_count = True
+ # if it was a true match, checksum should be present
+ break
+ if not not_inc_count:
+ count += 1
+ if count > 0 and total_detection_files_map[fileset_id] <= count:
+ match_counts[fileset_id] = count
+
+ # Filter only entries with maximum number of matched files
+ if not match_counts:
+ return ([], fileset_count)
+
+ max_match = max(match_counts.values())
+ candidates = [fid for fid, count in match_counts.items() if count == max_match]
+
+ matched_candidates = []
+ for candidate in candidates:
+ if is_full_detection_checksum_match(candidate, fileset, conn):
+ matched_candidates.append(candidate)
+
+ if len(matched_candidates) != 0:
+ candidates = matched_candidates
+
+ return (candidates, fileset_count)
def is_candidate_by_checksize(candidate, fileset, conn):
Commit: 62af22a0b562cf93a8be3666acc79d76d64c0897
https://github.com/scummvm/scummvm-sites/commit/62af22a0b562cf93a8be3666acc79d76d64c0897
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Merge one of the entries from dropped duplicate entries. Drop others.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 20a3240..f9e9fe0 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1693,7 +1693,7 @@ def set_process(
console_message = "Looking for duplicates..."
console_log(console_message)
- # Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants.
+ # Remove all such filesets, which have many to one mapping with a single candidate, just merge one of them.
value_to_keys = defaultdict(list)
for set_fileset, candidates in set_to_candidate_dict.items():
if len(candidates) == 1:
@@ -1717,7 +1717,12 @@ def set_process(
platform = result["platform"]
language = result["language"]
+ # Skip the first entry, let it merge and drop others
+ skip = True
for set_fileset in set_filesets:
+ if skip:
+ skip = False
+ continue
fileset = id_to_fileset_dict[set_fileset]
category_text = "Drop fileset - Duplicates"
fileset_name = fileset["name"] if "name" in fileset else ""
@@ -1742,9 +1747,9 @@ def set_process(
fileset = id_to_fileset_dict[fileset_id]
# Filter by platform to reduce manual merge
- candidate_filesets = set_filter_by_platform(
- fileset["name"], candidate_filesets, conn
- )
+ # candidate_filesets = set_filter_by_platform(
+ # fileset["name"], candidate_filesets, conn
+ # )
(
fully_matched_filesets,
@@ -1771,16 +1776,35 @@ def set_process(
match_count += 1
console_log("Matching performed.")
- for fileset_id, candidates in manual_merge_map.items():
- category_text = "Manual Merge Required"
- log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
- manual_merged_filesets += 1
- add_manual_merge(
- candidates, fileset_id, category_text, log_text, user, conn, log_text
- )
-
- # Final log
with conn.cursor() as cursor:
+ for fileset_id, candidates in manual_merge_map.items():
+ if len(candidates) == 0:
+ category_text = "Drop fileset - No Candidates"
+ fileset = id_to_fileset_dict[fileset_id]
+ fileset_name = fileset["name"] if "name" in fileset else ""
+ fileset_description = (
+ fileset["description"] if "description" in fileset else ""
+ )
+ log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+ create_log(
+ escape_string(category_text), user, escape_string(log_text), conn
+ )
+ dropped_early_no_candidate += 1
+ delete_original_fileset(fileset_id, conn)
+ else:
+ category_text = "Manual Merge Required"
+ log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
+ manual_merged_filesets += 1
+ add_manual_merge(
+ candidates,
+ fileset_id,
+ category_text,
+ log_text,
+ user,
+ conn,
+ log_text,
+ )
+
cursor.execute(
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
(transaction_id,),
Commit: d2aaa54f71382f1d9d347252bdea74be7f3e7c34
https://github.com/scummvm/scummvm-sites/commit/d2aaa54f71382f1d9d347252bdea74be7f3e7c34
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Merge filtering logic for glk with existing set.dat filtering.
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index f9e9fe0..897d346 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1659,14 +1659,9 @@ def set_process(
# Separating out the matching logic for glk engine
engine_name = fileset["sourcefile"].split("-")[0]
- if engine_name == "glk":
- (candidate_filesets, fileset_count) = set_glk_filter_candidate_filesets(
- fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
- )
- else:
- (candidate_filesets, fileset_count) = set_filter_candidate_filesets(
- fileset_id, fileset, fileset_count, transaction_id, conn
- )
+ (candidate_filesets, fileset_count) = set_filter_candidate_filesets(
+ fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
+ )
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
if len(candidate_filesets) == 0:
@@ -2071,93 +2066,16 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
return (len(unmatched_files) == 0, unmatched_files)
-def set_glk_filter_candidate_filesets(
- fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
-):
- """
- Returns a list of candidate filesets for glk engines that can be merged
- """
- with conn.cursor() as cursor:
- # Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
- fileset_count += 1
- console_log_candidate_filtering(fileset_count)
- query = """
- WITH candidate_fileset AS (
- SELECT fs.id AS fileset_id, f.size
- FROM file f
- JOIN fileset fs ON f.fileset = fs.id
- JOIN game g ON g.id = fs.game
- JOIN engine e ON e.id = g.engine
- JOIN transactions t ON t.fileset = fs.id
- WHERE fs.id != %s
- AND e.engineid = %s
- AND f.detection = 1
- AND t.transaction != %s
- AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))
- ),
- total_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS detection_files_found
- FROM candidate_fileset cf
- GROUP BY fileset_id
- ),
- set_fileset AS (
- SELECT size FROM file
- WHERE fileset = %s
- ),
- matched_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS match_files_count
- FROM candidate_fileset cf
- JOIN set_fileset sf ON
- cf.size = sf.size OR cf.size = 0
- GROUP BY cf.fileset_id
- ),
- valid_matched_detection_files AS (
- SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
- FROM matched_detection_files mdf
- JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
- WHERE tdf.detection_files_found <= mdf.match_files_count
- ),
- max_match_count AS (
- SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
- )
- SELECT vmdf.fileset_id
- FROM valid_matched_detection_files vmdf
- JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
- JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
- """
-
- gameid_pattern = f"%{fileset['name']}%"
-
- cursor.execute(
- query,
- (
- fileset_id,
- engine_name,
- transaction_id,
- fileset["name"],
- fileset["name"],
- gameid_pattern,
- fileset_id,
- ),
- )
- rows = cursor.fetchall()
-
- candidates = []
- if rows:
- for row in rows:
- candidates.append(row["fileset_id"])
-
- return (candidates, fileset_count)
-
-
def set_filter_candidate_filesets(
- fileset_id, fileset, fileset_count, transaction_id, conn
+ fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
):
"""
Returns a list of candidate filesets that can be merged.
Performs early filtering in SQL (by engine, name, size) and then
applies checksum filtering and max-match filtering in Python.
+ In case of glk engines, filtering is not by name, rather gameid is used.
"""
+ is_glk = engine_name == "glk"
with conn.cursor() as cursor:
fileset_count += 1
console_log_candidate_filtering(fileset_count)
@@ -2174,7 +2092,21 @@ def set_filter_candidate_filesets(
AND f.detection = 1
AND t.transaction != %s
"""
- cursor.execute(query, (fileset["sourcefile"], transaction_id))
+ if is_glk:
+ query += " AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))"
+ gameid_pattern = f"%{fileset['name']}%"
+ cursor.execute(
+ query,
+ (
+ engine_name,
+ transaction_id,
+ fileset["name"],
+ fileset["name"],
+ gameid_pattern,
+ ),
+ )
+ else:
+ cursor.execute(query, (fileset["sourcefile"], transaction_id))
raw_candidates = cursor.fetchall()
# fileset id to detection files map
@@ -2184,7 +2116,7 @@ def set_filter_candidate_filesets(
candidate_map[row["fileset_id"]].append(
{
"file_id": row["file_id"],
- "name": row["name"],
+ "name": os.path.basename(normalised_path(row["name"])).lower(),
"size": row["size"],
}
)
@@ -2193,14 +2125,17 @@ def set_filter_candidate_filesets(
set_checksums = set()
set_file_name_size = set()
+ set_glk_file_size = set()
for file in fileset["rom"]:
+ name = os.path.basename(normalised_path(file["name"]))
for key in file:
if key.startswith("md5"):
- name = os.path.basename(normalised_path(file["name"]))
set_checksums.add((file[key], name.lower(), int(file["size"])))
set_checksums.add((file[key], name.lower(), -1))
set_file_name_size.add((name.lower(), -1))
set_file_name_size.add((name.lower(), int(file["size"])))
+ if is_glk:
+ set_glk_file_size.add(int(file["size"]))
# Filter candidates by detection filename and file size (including -1) and increase matched file count
# if filesize = -1,
@@ -2213,6 +2148,8 @@ def set_filter_candidate_filesets(
for f in files:
filename = os.path.basename(f["name"]).lower()
filesize = f["size"]
+ if is_glk and (filesize in set_glk_file_size or filesize == 0):
+ count += 1
if (filename, filesize) in set_file_name_size:
if filesize == -1:
count += 1
Commit: fc88959d72728bee94a596c6fe1f48987db8ffca
https://github.com/scummvm/scummvm-sites/commit/fc88959d72728bee94a596c6fe1f48987db8ffca
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Add checksum filtering before max files filtering in scan.dat processing
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 897d346..2ae4778 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1059,6 +1059,7 @@ def scan_process(
create_log(escape_string(category_text), user, escape_string(log_text), conn)
category_text = "Upload information"
log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}."
+ console_log(log_text)
create_log(escape_string(category_text), user, escape_string(log_text), conn)
@@ -1115,9 +1116,12 @@ def pre_update_files(rom, filesets_check_for_full, transaction_id, conn):
SET size = %s,
`size-r` = %s,
`size-rd` = %s,
+ name = %s
WHERE id = %s
"""
- cursor.execute(query, size, size_r, size_rd, file_id)
+ cursor.execute(
+ query, (size, size_r, size_rd, normalised_path(rom["name"]), file_id)
+ )
def scan_perform_match(
@@ -1396,73 +1400,146 @@ def total_fileset_files(fileset):
def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
"""
- Returns a list of candidate filesets that can be merged
+ Returns a list of candidate filesets that can be merged.
+ Performs early filtering in SQL (by name, size) and then
+ applies checksum filtering and max-match filtering in Python.
"""
with conn.cursor() as cursor:
- # Returns those filesets which have all detection files matching in the scan fileset filtered by file name and file size(if not -1).
-
+ # Fetching detection filename and all sizes (size, size-r, size-rd) from database
query = """
- WITH candidate_fileset AS (
- SELECT fs.id AS fileset_id, f.name, f.size,
+ SELECT fs.id AS fileset_id, f.id as file_id, f.name, f.size,
f.`size-r` AS size_r, f.`size-rd` AS size_rd
FROM file f
JOIN fileset fs ON f.fileset = fs.id
JOIN game g ON g.id = fs.game
+ JOIN engine e ON e.id = g.engine
JOIN transactions t ON t.fileset = fs.id
WHERE f.detection = 1
AND t.transaction != %s
- ),
- total_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS detection_files_found
- FROM candidate_fileset cf
- GROUP BY fileset_id
- ),
- set_fileset AS (
- SELECT name, size,
- `size-r` AS size_r, `size-rd` AS size_rd
- FROM file
- WHERE fileset = %s
- ),
- matched_detection_files AS (
- SELECT cf.fileset_id, COUNT(*) AS match_files_count
- FROM candidate_fileset cf
- JOIN set_fileset sf ON ( (
- cf.name = sf.name
- OR
- REGEXP_REPLACE(cf.name, '^.*[\\\\/]', '') = REGEXP_REPLACE(sf.name, '^.*[\\\\/]', '')
- ) AND (cf.size = sf.size OR cf.size = -1)
- AND (cf.size_r = sf.size_r)
- AND (cf.size_rd = sf.size_rd))
- GROUP BY cf.fileset_id
- ),
- valid_matched_detection_files AS (
- SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
- FROM matched_detection_files mdf
- JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
- WHERE tdf.detection_files_found <= mdf.match_files_count
- ),
- max_match_count AS (
- SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
- )
- SELECT vmdf.fileset_id
- FROM valid_matched_detection_files vmdf
- JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
- JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
"""
+ cursor.execute(query, (transaction_id,))
+ raw_candidates = cursor.fetchall()
+
+ # fileset id to detection files map
+ candidate_map = defaultdict(list)
+ total_detection_files_map = defaultdict(int)
+ for row in raw_candidates:
+ candidate_map[row["fileset_id"]].append(
+ {
+ "file_id": row["file_id"],
+ "name": os.path.basename(normalised_path(row["name"])).lower(),
+ "size": row["size"],
+ "size-r": row["size_r"],
+ "size-rd": row["size_rd"],
+ }
+ )
+ for id, files in candidate_map.items():
+ total_detection_files_map[id] = len(files)
+
+ set_checksums = set()
+ set_file_name_size = set()
+ for file in fileset["rom"]:
+ name = os.path.basename(normalised_path(file["name"]))
+ for key in file:
+ if key.startswith("md5"):
+ set_checksums.add(
+ (
+ file[key],
+ name.lower(),
+ int(file["size"]),
+ int(file["size-r"]),
+ int(file["size-rd"]),
+ )
+ )
+ set_checksums.add(
+ (
+ file[key],
+ name.lower(),
+ -1,
+ int(file["size-r"]),
+ int(file["size-rd"]),
+ )
+ )
+ set_file_name_size.add(
+ (name.lower(), -1, int(file["size-r"]), int(file["size-rd"]))
+ )
+ set_file_name_size.add(
+ (name.lower(), int(file["size"]), int(file["size-r"]), int(file["size-rd"]))
+ )
+
+ # Filter candidates by detection filename and file size (including -1) and increase matched file count
+ # if filesize = -1,
+ # elif filesize <= checksize and checksum matches,
+ # elif filesize > checksize.
+ match_counts = {}
+ for fileset_id, files in candidate_map.items():
+ count = 0
+ with conn.cursor() as cursor:
+ for f in files:
+ filename = os.path.basename(f["name"]).lower()
+ size = f["size"]
+ size_r = f["size-r"]
+ size_rd = f["size-rd"]
+ if (filename, size, size_r, size_rd) in set_file_name_size:
+ if size == -1:
+ count += 1
+ else:
+ cursor.execute(
+ """
+ SELECT checksum, checksize, checktype
+ FROM filechecksum
+ WHERE file = %s
+ """,
+ (f["file_id"],),
+ )
+ checksums = cursor.fetchall()
+ not_inc_count = False
+ for c in checksums:
+ filesize = size
+ checksum = c["checksum"]
+ checksize = c["checksize"]
+ checktype = c["checktype"]
+ # Macfiles handling
+ if checktype in ["md5-r", "md5-rt"]:
+ filesize = size_rd
- cursor.execute(query, (transaction_id, fileset_id))
- rows = cursor.fetchall()
+ if checksize == "1M":
+ checksize = 1048576
+ elif checksize == "0":
+ checksize = filesize
+ if filesize <= int(checksize):
+ if (
+ checksum,
+ filename,
+ size,
+ size_r,
+ size_rd,
+ ) in set_checksums:
+ count += 1
+ not_inc_count = True
+ # if it was a true match, checksum should be present
+ break
+ if not not_inc_count:
+ count += 1
+ if count > 0 and total_detection_files_map[fileset_id] <= count:
+ match_counts[fileset_id] = count
+
+ # Filter only entries with maximum number of matched files
+ if not match_counts:
+ return []
- candidates = []
- if rows:
- for row in rows:
- candidates.append(row["fileset_id"])
+ max_match = max(match_counts.values())
+ candidates = [fid for fid, count in match_counts.items() if count == max_match]
- for candidate in candidates:
- if not is_full_detection_checksum_match(candidate, fileset, conn):
- candidates.remove(candidate)
+ matched_candidates = []
+ for candidate in candidates:
+ if is_full_detection_checksum_match(candidate, fileset, conn):
+ matched_candidates.append(candidate)
+
+ if len(matched_candidates) != 0:
+ candidates = matched_candidates
- return candidates
+ return candidates
def get_unmatched_files(candidate_fileset, fileset, conn):
Commit: 4fe8d4f14119c00f7708b80080abc0e5dccc9bfc
https://github.com/scummvm/scummvm-sites/commit/4fe8d4f14119c00f7708b80080abc0e5dccc9bfc
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Parameterising all sql queries in db_functions.py
Changed paths:
db_functions.py
diff --git a/db_functions.py b/db_functions.py
index 2ae4778..a41c409 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -79,24 +79,26 @@ def insert_game(engine_name, engineid, title, gameid, extra, platform, lang, con
# Set @engine_last if engine already present in table
exists = False
with conn.cursor() as cursor:
- cursor.execute(f"SELECT id FROM engine WHERE engineid = '{engineid}'")
+ cursor.execute("SELECT id FROM engine WHERE engineid = %s", (engineid,))
res = cursor.fetchone()
if res is not None:
exists = True
- cursor.execute(f"SET @engine_last = '{res['id']}'")
+ cursor.execute("SET @engine_last = %s", (res["id"],))
# Insert into table if not present
if not exists:
with conn.cursor() as cursor:
cursor.execute(
- f"INSERT INTO engine (name, engineid) VALUES ('{escape_string(engine_name)}', '{engineid}')"
+ "INSERT INTO engine (name, engineid) VALUES (%s, %s)",
+ (engine_name, engineid),
)
cursor.execute("SET @engine_last = LAST_INSERT_ID()")
# Insert into game
with conn.cursor() as cursor:
cursor.execute(
- f"INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES ('{escape_string(title)}', @engine_last, '{gameid}', '{escape_string(extra)}', '{platform}', '{lang}')"
+ "INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES (%s, @engine_last, %s, %s, %s, %s)",
+ (title, gameid, extra, platform, lang),
)
cursor.execute("SET @game_last = LAST_INSERT_ID()")
@@ -116,8 +118,8 @@ def insert_fileset(
):
status = "detection" if detection else src
game = "NULL"
- key = "NULL" if key == "" else f"'{key}'"
- megakey = "NULL" if megakey == "" else f"'{megakey}'"
+ key = "NULL" if key == "" else key
+ megakey = "NULL" if megakey == "" else megakey
if detection:
status = "detection"
@@ -129,24 +131,27 @@ def insert_fileset(
# Check if key/megakey already exists, if so, skip insertion (no quotes on purpose)
if detection:
with conn.cursor() as cursor:
- cursor.execute(f"SELECT id FROM fileset WHERE megakey = {megakey}")
+ cursor.execute("SELECT id FROM fileset WHERE megakey = %s", (megakey,))
existing_entry = cursor.fetchone()
else:
with conn.cursor() as cursor:
- cursor.execute(f"SELECT id FROM fileset WHERE `key` = {key}")
+ cursor.execute("SELECT id FROM fileset WHERE `key` = %s", (key,))
existing_entry = cursor.fetchone()
if existing_entry is not None:
existing_entry = existing_entry["id"]
with conn.cursor() as cursor:
- cursor.execute(f"SET @fileset_last = {existing_entry}")
- cursor.execute(f"DELETE FROM file WHERE fileset = {existing_entry}")
+ cursor.execute("SET @fileset_last = %s", (existing_entry,))
+ cursor.execute("DELETE FROM file WHERE fileset = %s", (existing_entry,))
cursor.execute(
- f"UPDATE fileset SET `timestamp` = FROM_UNIXTIME(@fileset_time_last) WHERE id = {existing_entry}"
+ "UPDATE fileset SET `timestamp` = FROM_UNIXTIME(@fileset_time_last) WHERE id = %s",
+ (existing_entry,),
+ )
+ cursor.execute(
+ "SELECT status FROM fileset WHERE id = %s", (existing_entry,)
)
- cursor.execute(f"SELECT status FROM fileset WHERE id = {existing_entry}")
status = cursor.fetchone()["status"]
if status == "user":
add_usercount(existing_entry, conn)
@@ -162,10 +167,10 @@ def insert_fileset(
return (existing_entry, True)
# $game and $key should not be parsed as a mysql string, hence no quotes
- query = f"INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`, set_dat_metadata) VALUES ({game}, '{status}', '{src}', {key}, {megakey}, FROM_UNIXTIME(@fileset_time_last), '{escape_string(set_dat_metadata)}')"
+ query = f"INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`, set_dat_metadata) VALUES ({game}, %s, %s, %s, %s, FROM_UNIXTIME(@fileset_time_last), %s)"
fileset_id = -1
with conn.cursor() as cursor:
- cursor.execute(query)
+ cursor.execute(query, (status, src, key, megakey, set_dat_metadata))
fileset_id = cursor.lastrowid
cursor.execute("SET @fileset_last = LAST_INSERT_ID()")
@@ -188,7 +193,8 @@ def insert_fileset(
update_history(0, fileset_last, conn)
with conn.cursor() as cursor:
cursor.execute(
- f"INSERT INTO transactions (`transaction`, fileset) VALUES ({transaction}, {fileset_last})"
+ "INSERT INTO transactions (`transaction`, fileset) VALUES (%s, %s)",
+ (transaction, fileset_last),
)
return (fileset_id, False)
@@ -230,17 +236,11 @@ def insert_file(file, detection, src, conn):
values.append(file["size"] if "size" in file else "0")
values.append(file["size-r"] if "size-r" in file else "0")
values.append(file["size-rd"] if "size-rd" in file else "0")
-
- modification_time = file["modification-time"] if "modification-time" in file else ""
- values.append(modification_time)
-
+ values.append(file["modification-time"] if "modification-time" in file else "")
values.extend([checksum, detection, detection_type])
# Parameterised Query
- placeholders = (
- ["%s"] * (len(values[:6])) + ["@fileset_last"] + ["%s"] * 2 + ["NOW()"]
- )
- query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})"
+ query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES (%s, %s, %s, %s, %s, %s, @fileset_last, %s, %s, NOW())"
with conn.cursor() as cursor:
cursor.execute(query, values)
@@ -248,7 +248,8 @@ def insert_file(file, detection, src, conn):
if detection:
with conn.cursor() as cursor:
cursor.execute(
- f"UPDATE fileset SET detection_size = {checksize} WHERE id = @fileset_last AND detection_size IS NULL"
+ "UPDATE fileset SET detection_size = %s WHERE id = @fileset_last AND detection_size IS NULL",
+ (checksize,),
)
with conn.cursor() as cursor:
cursor.execute("SET @file_last = LAST_INSERT_ID()")
@@ -279,7 +280,7 @@ def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
if checktype[-1] == "r":
size_name += "-rd"
- cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = {file_id}")
+ cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = %s", (file_id,))
result = cursor.fetchone()
if not result:
return
@@ -375,9 +376,10 @@ def punycode_need_encode(orig):
def create_log(category, user, text, conn):
query = f"INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME({int(time.time())}), '{escape_string(category)}', '{escape_string(user)}', '{escape_string(text)}')"
+ query = "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME(%s), %s, %s, %s)"
with conn.cursor() as cursor:
try:
- cursor.execute(query)
+ cursor.execute(query, (int(time.time()), category, user, text))
conn.commit()
except Exception as e:
conn.rollback()
@@ -390,10 +392,12 @@ def create_log(category, user, text, conn):
def update_history(source_id, target_id, conn, log_last=None):
- query = f"INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (NOW(), {target_id}, {source_id}, {log_last if log_last is not None else 0})"
+ query = "INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (NOW(), %s, %s, %s)"
with conn.cursor() as cursor:
try:
- cursor.execute(query)
+ cursor.execute(
+ query, (target_id, source_id, log_last if log_last is not None else 0)
+ )
conn.commit()
except Exception as e:
conn.rollback()
@@ -418,7 +422,8 @@ def get_all_related_filesets(fileset_id, conn, visited=None):
try:
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT fileset, oldfileset FROM history WHERE fileset = {fileset_id} OR oldfileset = {fileset_id}"
+ "SELECT fileset, oldfileset FROM history WHERE fileset = %s OR oldfileset = %s",
+ (fileset_id, fileset_id),
)
history_records = cursor.fetchall()
@@ -516,7 +521,7 @@ def db_insert(data_arr, username=None, skiplog=False):
detection = src == "scummvm"
status = "detection" if detection else src
- conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}")
+ conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
with conn.cursor() as cursor:
cursor.execute("SELECT MAX(`transaction`) FROM transactions")
@@ -611,7 +616,8 @@ def db_insert(data_arr, username=None, skiplog=False):
try:
cur.execute(
- f"SELECT COUNT(fileset) from transactions WHERE `transaction` = {transaction_id}"
+ "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
+ (transaction_id,),
)
fileset_insertion_count = cur.fetchone()["COUNT(fileset)"]
category_text = f"Uploaded from {src}"
@@ -627,11 +633,13 @@ def db_insert(data_arr, username=None, skiplog=False):
def compare_filesets(id1, id2, conn):
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = '{id1}'"
+ "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s",
+ (id1,),
)
fileset1 = cursor.fetchall()
cursor.execute(
- f"SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = '{id2}'"
+ "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s",
+ (id2,),
)
fileset2 = cursor.fetchall()
@@ -665,9 +673,9 @@ def find_matching_game(game_files):
for file in game_files:
checksum = file[1]
- query = f"SELECT file.fileset as file_fileset FROM filechecksum JOIN file ON filechecksum.file = file.id WHERE filechecksum.checksum = '{checksum}' AND file.detection = TRUE"
+ query = "SELECT file.fileset as file_fileset FROM filechecksum JOIN file ON filechecksum.file = file.id WHERE filechecksum.checksum = %s AND file.detection = TRUE"
with conn.cursor() as cursor:
- cursor.execute(query)
+ cursor.execute(query, (checksum,))
records = cursor.fetchall()
# If file is not part of detection entries, skip it
@@ -682,7 +690,8 @@ def find_matching_game(game_files):
for key, value in Counter(matching_filesets).items():
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT COUNT(file.id) FROM file JOIN fileset ON file.fileset = fileset.id WHERE fileset.id = '{key}'"
+ "SELECT COUNT(file.id) FROM file JOIN fileset ON file.fileset = fileset.id WHERE fileset.id = %s",
+ (key,),
)
count_files_in_fileset = cursor.fetchone()["COUNT(file.id)"]
@@ -693,7 +702,8 @@ def find_matching_game(game_files):
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT engineid, game.id, gameid, platform, language, `key`, src, fileset.id as fileset FROM game JOIN fileset ON fileset.game = game.id JOIN engine ON engine.id = game.engine WHERE fileset.id = '{key}'"
+ "SELECT engineid, game.id, gameid, platform, language, `key`, src, fileset.id as fileset FROM game JOIN fileset ON fileset.game = game.id JOIN engine ON engine.id = game.engine WHERE fileset.id = %s",
+ (key,),
)
records = cursor.fetchall()
@@ -717,7 +727,7 @@ def find_matching_game(game_files):
if compare_filesets(matching_games[0]["fileset"], game_files[0][0], conn):
with conn.cursor() as cursor:
cursor.execute(
- f"UPDATE fileset SET `delete` = TRUE WHERE id = {game_files[0][0]}"
+ "UPDATE fileset SET `delete` = TRUE WHERE id = %s", (game_files[0][0],)
)
return []
@@ -730,7 +740,8 @@ def merge_filesets(detection_id, dat_id):
try:
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT DISTINCT(filechecksum.checksum), checksize, checktype FROM filechecksum JOIN file on file.id = filechecksum.file WHERE fileset = '{detection_id}'"
+ "SELECT DISTINCT(filechecksum.checksum), checksize, checktype FROM filechecksum JOIN file on file.id = filechecksum.file WHERE fileset = %s'",
+ (detection_id,),
)
detection_files = cursor.fetchall()
@@ -740,22 +751,26 @@ def merge_filesets(detection_id, dat_id):
checktype = file[2]
cursor.execute(
- f"DELETE FROM file WHERE checksum = '{checksum}' AND fileset = {detection_id} LIMIT 1"
+ "DELETE FROM file WHERE checksum = %s AND fileset = %s LIMIT 1",
+ (checksum, detection_id),
)
cursor.execute(
- f"UPDATE file JOIN filechecksum ON filechecksum.file = file.id SET detection = TRUE, checksize = {checksize}, checktype = '{checktype}' WHERE fileset = '{dat_id}' AND filechecksum.checksum = '{checksum}'"
+ "UPDATE file JOIN filechecksum ON filechecksum.file = file.id SET detection = TRUE, checksize = %s, checktype = %s WHERE fileset = %s AND filechecksum.checksum = %s",
+ (checksize, checktype, dat_id, checksum),
)
cursor.execute(
- f"INSERT INTO history (`timestamp`, fileset, oldfileset) VALUES (FROM_UNIXTIME({int(time.time())}), {dat_id}, {detection_id})"
+ "INSERT INTO history (`timestamp`, fileset, oldfileset) VALUES (FROM_UNIXTIME(%s), %s, %s)",
+ (int(time.time()), dat_id, detection_id),
)
cursor.execute("SELECT LAST_INSERT_ID()")
history_last = cursor.fetchone()["LAST_INSERT_ID()"]
cursor.execute(
- f"UPDATE history SET fileset = {dat_id} WHERE fileset = {detection_id}"
+ "UPDATE history SET fileset = %s WHERE fileset = %s",
+ (dat_id, detection_id),
)
- cursor.execute(f"DELETE FROM fileset WHERE id = {detection_id}")
+ cursor.execute("DELETE FROM fileset WHERE id = %s", (detection_id,))
conn.commit()
except Exception as e:
@@ -812,11 +827,13 @@ def populate_matching_games():
log_text = f"Matched game {matched_game['engineid']}:\n{matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']}\nvariant {matched_game['key']}. State {status}. Fileset:{fileset[0][0]}."
# Updating the fileset.game value to be $matched_game["id"]
- query = f"UPDATE fileset SET game = {matched_game['id']}, status = '{status}', `key` = '{matched_game['key']}' WHERE id = {fileset[0][0]}"
+ query = "UPDATE fileset SET game = %s, status = %s, `key` = %s WHERE id = %s"
history_last = merge_filesets(matched_game["fileset"], fileset[0][0])
- if cursor.execute(query):
+ if cursor.execute(
+ query, (matched_game["id"], status, matched_game["key"], fileset[0][0])
+ ):
user = f"cli:{getpass.getuser()}"
create_log(
@@ -835,7 +852,7 @@ def populate_matching_games():
# Add log id to the history table
cursor.execute(
- f"UPDATE history SET log = {log_last} WHERE id = {history_last}"
+ "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last)
)
try:
@@ -873,7 +890,7 @@ def match_fileset(data_arr, username=None, skiplog=False):
detection = src == "scummvm"
source_status = "detection" if detection else src
- conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}")
+ conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
with conn.cursor() as cursor:
cursor.execute("SELECT MAX(`transaction`) FROM transactions")
@@ -1280,7 +1297,8 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
with conn.cursor() as cursor:
# Extracting the filename from the filepath.
cursor.execute(
- f"SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = {candidate_fileset_id}"
+ "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = %s",
+ (candidate_fileset_id,),
)
target_files = cursor.fetchall()
candidate_files = {
@@ -2417,13 +2435,13 @@ def find_matching_filesets(fileset, conn, status):
checksize, checktype, checksum = get_checksum_props(
checktype, checksum
)
- query = f"""SELECT DISTINCT fs.id AS fileset_id
+ query = """SELECT DISTINCT fs.id AS fileset_id
FROM fileset fs
JOIN file f ON fs.id = f.fileset
JOIN filechecksum fc ON f.id = fc.file
- WHERE fc.checksum = '{checksum}' AND fc.checktype = '{checktype}'
- AND fs.status IN ({state})"""
- cursor.execute(query)
+ WHERE fc.checksum = %s AND fc.checktype = %s
+ AND fs.status IN (%s)"""
+ cursor.execute(query, (checksum, checktype, state))
records = cursor.fetchall()
if records:
for record in records:
@@ -2446,16 +2464,16 @@ def matching_set(fileset, conn):
checksum = checksum.split(":")[1]
size = file["size"]
- query = f"""
+ query = """
SELECT DISTINCT fs.id AS fileset_id
FROM fileset fs
JOIN file f ON fs.id = f.fileset
JOIN filechecksum fc ON f.id = fc.file
- WHERE fc.checksum = '{checksum}' AND fc.checktype LIKE 'md5%'
- AND fc.checksize > {size}
+ WHERE fc.checksum = %s AND fc.checktype LIKE 'md5%'
+ AND fc.checksize > %s
AND fs.status = 'detection'
"""
- cursor.execute(query)
+ cursor.execute(query, (checksum, size))
records = cursor.fetchall()
if records:
for record in records:
@@ -2485,11 +2503,12 @@ def handle_matched_filesets(
if is_full_matched:
break
cursor.execute(
- f"SELECT status FROM fileset WHERE id = {matched_fileset_id}"
+ "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
)
status = cursor.fetchone()["status"]
cursor.execute(
- f"SELECT COUNT(file.id) FROM file WHERE fileset = {matched_fileset_id}"
+ "SELECT COUNT(file.id) FROM file WHERE fileset = %s",
+ (matched_fileset_id,),
)
count = cursor.fetchone()["COUNT(file.id)"]
@@ -2535,28 +2554,31 @@ def handle_matched_filesets(
def delete_original_fileset(fileset_id, conn):
with conn.cursor() as cursor:
- cursor.execute(f"DELETE FROM file WHERE fileset = {fileset_id}")
- cursor.execute(f"DELETE FROM fileset WHERE id = {fileset_id}")
+ cursor.execute("DELETE FROM file WHERE fileset = %s", (fileset_id,))
+ cursor.execute("DELETE FROM fileset WHERE id = %s", (fileset_id,))
conn.commit()
def update_fileset_status(cursor, fileset_id, status):
- cursor.execute(f"""
+ cursor.execute(
+ """
UPDATE fileset SET
- status = '{status}',
- `timestamp` = FROM_UNIXTIME({int(time.time())})
- WHERE id = {fileset_id}
- """)
+ status = %s,
+ `timestamp` = FROM_UNIXTIME(%s)
+ WHERE id = %s
+ """,
+ (status, int(time.time()), fileset_id),
+ )
def populate_file(fileset, fileset_id, conn, detection):
with conn.cursor() as cursor:
- cursor.execute(f"SELECT * FROM file WHERE fileset = {fileset_id}")
+ cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,))
target_files = cursor.fetchall()
target_files_dict = {}
for target_file in target_files:
cursor.execute(
- f"SELECT * FROM filechecksum WHERE file = {target_file['id']}"
+ "SELECT * FROM filechecksum WHERE file = %s", (target_file["id"],)
)
target_checksums = cursor.fetchall()
for checksum in target_checksums:
@@ -2681,7 +2703,8 @@ def set_populate_file(fileset, fileset_id, conn, detection):
with conn.cursor() as cursor:
# Extracting the filename from the filepath.
cursor.execute(
- f"SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = {fileset_id}"
+ "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = %s",
+ (fileset_id,),
)
target_files = cursor.fetchall()
candidate_files = {
@@ -2723,23 +2746,17 @@ def set_populate_file(fileset, fileset_id, conn, detection):
):
name = normalised_path(file["name"])
values = [name]
-
values.append(file["size"] if "size" in file else "0")
values.append(file["size-r"] if "size-r" in file else "0")
values.append(file["size-rd"] if "size-rd" in file else "0")
-
values.extend([checksum, fileset_id, detection, "None"])
- placeholders = (
- ["%s"] * (len(values[:5])) + ["%s"] + ["%s"] * 2 + ["NOW()"]
- )
- query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})"
+ query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, NOW())"
cursor.execute(query, values)
cursor.execute("SET @file_last = LAST_INSERT_ID()")
cursor.execute("SELECT @file_last AS file_id")
- cursor.execute("SELECT @file_last AS file_id")
file_id = cursor.fetchone()["file_id"]
insert_filechecksum(file, "md5", file_id, conn)
@@ -2877,7 +2894,8 @@ def finalize_fileset_insertion(
):
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT COUNT(fileset) from transactions WHERE `transaction` = {transaction_id}"
+ "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
+ (transaction_id,),
)
fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
category_text = f"Uploaded from {src}"
@@ -2915,7 +2933,7 @@ def user_integrity_check(data, ip, game_metadata=None):
print(f"Failed to connect to database: {e}")
return
- conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}")
+ conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
try:
with conn.cursor() as cursor:
@@ -2940,12 +2958,13 @@ def user_integrity_check(data, ip, game_metadata=None):
missing_set = set()
for fileset_id in matched_map.keys():
- cursor.execute(f"SELECT * FROM file WHERE fileset = {fileset_id}")
+ cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,))
target_files = cursor.fetchall()
target_files_dict = {}
for target_file in target_files:
cursor.execute(
- f"SELECT * FROM filechecksum WHERE file = {target_file['id']}"
+ "SELECT * FROM filechecksum WHERE file = %s",
+ (target_file["id"],),
)
target_checksums = cursor.fetchall()
for checksum in target_checksums:
@@ -3025,12 +3044,13 @@ def user_integrity_check(data, ip, game_metadata=None):
most_matched = matched_list[0]
matched_fileset_id, matched_count = most_matched[0], most_matched[1]
cursor.execute(
- f"SELECT status FROM fileset WHERE id = {matched_fileset_id}"
+ "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
)
status = cursor.fetchone()["status"]
cursor.execute(
- f"SELECT COUNT(file.id) FROM file WHERE fileset = {matched_fileset_id}"
+ "SELECT COUNT(file.id) FROM file WHERE fileset = %s",
+ (matched_fileset_id,),
)
count = cursor.fetchone()["COUNT(file.id)"]
if status == "full" and count == matched_count:
@@ -3068,13 +3088,14 @@ def user_integrity_check(data, ip, game_metadata=None):
def add_usercount(fileset, conn):
with conn.cursor() as cursor:
cursor.execute(
- f"UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = {fileset}"
+ "UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = %s",
+ (fileset,),
)
- cursor.execute(f"SELECT user_count from fileset WHERE id = {fileset}")
+ cursor.execute("SELECT user_count from fileset WHERE id = %s", (fileset,))
count = cursor.fetchone()["user_count"]
if count >= 3:
cursor.execute(
- f"UPDATE fileset SET status = 'ReadyForReview' WHERE id = {fileset}"
+ "UPDATE fileset SET status = 'ReadyForReview' WHERE id = %s", (fileset,)
)
Commit: 10dc79f415df66b1f4e5bf14212c78022fbf1472
https://github.com/scummvm/scummvm-sites/commit/10dc79f415df66b1f4e5bf14212c78022fbf1472
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Parametrise sql queries in all files.
Changed paths:
clear.py
fileset.py
user_fileset_functions.py
diff --git a/clear.py b/clear.py
index 707914c..acdae14 100644
--- a/clear.py
+++ b/clear.py
@@ -7,26 +7,30 @@ import pymysql
import json
import os
+
def truncate_all_tables(conn):
+ # fmt: off
tables = ["filechecksum", "queue", "history", "transactions", "file", "fileset", "game", "engine", "log"]
cursor = conn.cursor()
-
+ # fmt: on
+
# Disable foreign key checks
cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
-
+
for table in tables:
try:
- cursor.execute(f"TRUNCATE TABLE `{table}`")
+ cursor.execute("TRUNCATE TABLE %s", (table,))
print(f"Table '{table}' truncated successfully")
except pymysql.Error as err:
print(f"Error truncating table '{table}': {err}")
-
+
# Enable foreign key checks
cursor.execute("SET FOREIGN_KEY_CHECKS = 1")
+
if __name__ == "__main__":
base_dir = os.path.dirname(os.path.abspath(__file__))
- config_path = os.path.join(base_dir, 'mysql_config.json')
+ config_path = os.path.join(base_dir, "mysql_config.json")
with open(config_path) as f:
mysql_cred = json.load(f)
@@ -41,9 +45,9 @@ if __name__ == "__main__":
user=username,
password=password,
db=dbname, # Specify the database to use
- charset='utf8mb4',
+ charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor,
- autocommit=True
+ autocommit=True,
)
# Check connection
@@ -55,4 +59,4 @@ if __name__ == "__main__":
truncate_all_tables(conn)
# Close connection
- conn.close()
\ No newline at end of file
+ conn.close()
diff --git a/fileset.py b/fileset.py
index a930f54..a45556e 100644
--- a/fileset.py
+++ b/fileset.py
@@ -15,7 +15,6 @@ from user_fileset_functions import (
)
from pagination import create_page
import difflib
-from pymysql.converters import escape_string
from db_functions import (
find_matching_filesets,
get_all_related_filesets,
@@ -123,15 +122,18 @@ def fileset():
id = max(min_id, min(id, max_id))
# Check if the id exists in the fileset table
- cursor.execute(f"SELECT id FROM fileset WHERE id = {id}")
+ cursor.execute("SELECT id FROM fileset WHERE id = %s", (id,))
if cursor.rowcount == 0:
# If the id doesn't exist, get a new id from the history table
- cursor.execute(f"SELECT fileset FROM history WHERE oldfileset = {id}")
+ cursor.execute(
+ "SELECT fileset FROM history WHERE oldfileset = %s", (id,)
+ )
id = cursor.fetchone()["fileset"]
# Get the history for the current id
cursor.execute(
- f"SELECT `timestamp`, oldfileset, log FROM history WHERE fileset = {id} ORDER BY `timestamp`"
+ "SELECT `timestamp`, oldfileset, log FROM history WHERE fileset = %s ORDER BY `timestamp`",
+ (id,),
)
history = cursor.fetchall()
@@ -253,7 +255,8 @@ def fileset():
columns_to_select = "file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`, `modification-time`"
columns_to_select += ", ".join(md5_columns)
cursor.execute(
- f"SELECT file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`, `modification-time` FROM file WHERE fileset = {id} {order}"
+ f"SELECT file.id, name, size, `size-r`, `size-rd`, checksum, detection, detection_type, `timestamp`, `modification-time` FROM file WHERE fileset = %s {order}",
+ (id,),
)
result = cursor.fetchall()
@@ -263,7 +266,8 @@ def fileset():
if widetable == "full":
file_ids = [file["id"] for file in result]
cursor.execute(
- f"SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN ({','.join(map(str, file_ids))})"
+ "SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN (%s)",
+ (",".join(map(str, file_ids)),),
)
checksums = cursor.fetchall()
@@ -330,7 +334,8 @@ def fileset():
if "delete" in request.form:
cursor.execute(
- f"UPDATE fileset SET `delete` = TRUE WHERE id = {request.form['delete']}"
+ "UPDATE fileset SET `delete` = TRUE WHERE id = %s",
+ (request.form["delete"],),
)
connection.commit()
html += "<p id='delete-confirm'>Fileset marked for deletion</p>"
@@ -341,7 +346,8 @@ def fileset():
# Generate the HTML for the fileset history
cursor.execute(
- f"SELECT `timestamp`, category, `text`, id FROM log WHERE `text` REGEXP 'Fileset:{id}' ORDER BY `timestamp` DESC, id DESC"
+ "SELECT `timestamp`, category, `text`, id FROM log WHERE `text` REGEXP 'Fileset:%s' ORDER BY `timestamp` DESC, id DESC",
+ (id,),
)
# cursor.execute(f"SELECT `timestamp`, fileset, oldfileset FROM history WHERE fileset = {id} ORDER BY `timestamp` DESC")
@@ -357,14 +363,19 @@ def fileset():
related_filesets = get_all_related_filesets(id, connection)
cursor.execute(
- f"SELECT * FROM history WHERE fileset IN ({','.join(map(str, related_filesets))}) OR oldfileset IN ({','.join(map(str, related_filesets))})"
+ "SELECT * FROM history WHERE fileset IN (%s) OR oldfileset IN (%s)",
+ (
+ ",".join(map(str, related_filesets)),
+ ",".join(map(str, related_filesets)),
+ ),
)
history = cursor.fetchall()
print(f"History: {history}")
for h in history:
cursor.execute(
- f"SELECT `timestamp`, category, `text`, id FROM log WHERE `text` LIKE 'Fileset:{h['oldfileset']}' ORDER BY `timestamp` DESC, id DESC"
+ "SELECT `timestamp`, category, `text`, id FROM log WHERE `text` LIKE 'Fileset:%s' ORDER BY `timestamp` DESC, id DESC",
+ (h["oldfileset"],),
)
logs = cursor.fetchall()
print(f"Logs: {logs}")
@@ -378,7 +389,9 @@ def fileset():
html += f"<td>Created fileset <a href='fileset?id={h['fileset']}'>Fileset {h['fileset']}</a></td>\n"
# html += f"<td><a href='logs?id={h['log']}'>Log {h['log']}</a></td>\n"
if h["log"]:
- cursor.execute(f"SELECT `text` FROM log WHERE id = {h['log']}")
+ cursor.execute(
+ "SELECT `text` FROM log WHERE id = %s", (h["log"],)
+ )
log_text = cursor.fetchone()["text"]
log_text = convert_log_text_to_links(log_text)
html += f"<td><a href='logs?id={h['log']}'>Log {h['log']}</a>: {log_text}</td>\n"
@@ -393,7 +406,7 @@ def fileset():
html += f"<td><a href='fileset?id={h['oldfileset']}'>Fileset {h['oldfileset']}</a> merged into fileset <a href='fileset?id={h['fileset']}'>Fileset {h['fileset']}</a></td>\n"
# html += f"<td><a href='logs?id={h['log']}'>Log {h['log']}</a></td>\n"
if h["log"]:
- cursor.execute(f"SELECT `text` FROM log WHERE id = {h['log']}")
+ cursor.execute("SELECT `text` FROM log WHERE id = %s", (h["log"],))
log_text = cursor.fetchone()["text"]
log_text = convert_log_text_to_links(log_text)
html += f"<td><a href='logs?id={h['log']}'>Log {h['log']}</a>: {log_text}</td>\n"
@@ -425,21 +438,23 @@ def match_fileset_route(id):
try:
with connection.cursor() as cursor:
- cursor.execute(f"SELECT * FROM fileset WHERE id = {id}")
+ cursor.execute("SELECT * FROM fileset WHERE id = %s", (id,))
fileset = cursor.fetchone()
fileset["rom"] = []
if not fileset:
return f"No fileset found with id {id}", 404
cursor.execute(
- f"SELECT file.id, name, size, checksum, detection, detection_type FROM file WHERE fileset = {id}"
+ "SELECT file.id, name, size, checksum, detection, detection_type FROM file WHERE fileset = %s",
+ (id,),
)
result = cursor.fetchall()
file_ids = {}
for file in result:
file_ids[file["id"]] = (file["name"], file["size"])
cursor.execute(
- f"SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN ({','.join(map(str, file_ids.keys()))})"
+ "SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN (%s)",
+ (",".join(map(str, file_ids.keys())),),
)
files = cursor.fetchall()
@@ -491,7 +506,7 @@ def match_fileset_route(id):
if fileset_id == id:
continue
cursor.execute(
- f"SELECT COUNT(file.id) FROM file WHERE fileset = {fileset_id}"
+ "SELECT COUNT(file.id) FROM file WHERE fileset = %s", (fileset_id,)
)
count = cursor.fetchone()["COUNT(file.id)"]
html += f"""
@@ -717,7 +732,8 @@ def confirm_merge(id):
try:
with connection.cursor() as cursor:
- cursor.execute(f"""
+ cursor.execute(
+ """
SELECT
fs.*,
g.name AS game_name,
@@ -730,11 +746,14 @@ def confirm_merge(id):
LEFT JOIN
game g ON fs.game = g.id
WHERE
- fs.id = {id}
- """)
+ fs.id = %s
+ """,
+ (id,),
+ )
source_fileset = cursor.fetchone()
print(source_fileset)
- cursor.execute(f"""
+ cursor.execute(
+ """
SELECT
fs.*,
g.name AS game_name,
@@ -747,8 +766,10 @@ def confirm_merge(id):
LEFT JOIN
game g ON fs.game = g.id
WHERE
- fs.id = {target_id}
- """)
+ fs.id = %s
+ """,
+ (target_id,),
+ )
def highlight_differences(source, target):
diff = difflib.ndiff(source, target)
@@ -846,63 +867,101 @@ def execute_merge(id, source=None, target=None):
try:
with connection.cursor() as cursor:
- cursor.execute(f"SELECT * FROM fileset WHERE id = {source_id}")
+ cursor.execute("SELECT * FROM fileset WHERE id = %s", (source_id,))
source_fileset = cursor.fetchone()
- cursor.execute(f"SELECT * FROM fileset WHERE id = {target_id}")
+ cursor.execute("SELECT * FROM fileset WHERE id = %s", (target_id,))
if source_fileset["status"] == "detection":
- cursor.execute(f"""
+ cursor.execute(
+ """
UPDATE fileset SET
- game = '{source_fileset["game"]}',
- status = '{source_fileset["status"]}',
- `key` = '{source_fileset["key"]}',
- megakey = '{source_fileset["megakey"]}',
- `timestamp` = '{source_fileset["timestamp"]}'
- WHERE id = {target_id}
- """)
+ game = %s
+ status = %s,
+ `key` = %s,
+ megakey = %s,
+ `timestamp` = %s
+ WHERE id = %s
+ """,
+ (
+ source_fileset["game"],
+ source_fileset["status"],
+ source_fileset["key"],
+ source_fileset["megakey"],
+ source_fileset["timestamp"],
+ target_id,
+ ),
+ )
- cursor.execute(f"DELETE FROM file WHERE fileset = {target_id}")
+ cursor.execute("DELETE FROM file WHERE fileset = %s", (target_id,))
- cursor.execute(f"SELECT * FROM file WHERE fileset = {source_id}")
+ cursor.execute("SELECT * FROM file WHERE fileset = %s", (source_id,))
source_files = cursor.fetchall()
for file in source_files:
- cursor.execute(f"""
+ cursor.execute(
+ """
INSERT INTO file (name, size, checksum, fileset, detection, `timestamp`)
- VALUES ('{escape_string(file["name"]).lower()}', '{file["size"]}', '{file["checksum"]}', {target_id}, {file["detection"]}, NOW())
- """)
+ VALUES (%s, %s, %s, %s, %s, NOW())
+ """,
+ (
+ file["name"].lower(),
+ file["size"],
+ file["checksum"],
+ target_id,
+ file["detection"],
+ ),
+ )
cursor.execute("SELECT LAST_INSERT_ID() as file_id")
new_file_id = cursor.fetchone()["file_id"]
cursor.execute(
- f"SELECT * FROM filechecksum WHERE file = {file['id']}"
+ "SELECT * FROM filechecksum WHERE file = %s", (file["id"],)
)
file_checksums = cursor.fetchall()
for checksum in file_checksums:
- cursor.execute(f"""
+ cursor.execute(
+ """
INSERT INTO filechecksum (file, checksize, checktype, checksum)
- VALUES ({new_file_id}, '{checksum["checksize"]}', '{checksum["checktype"]}', '{checksum["checksum"]}')
- """)
+ VALUES (%s, %s, %s, %s)
+ """,
+ (
+ new_file_id,
+ checksum["checksize"],
+ checksum["checktype"],
+ checksum["checksum"],
+ ),
+ )
elif source_fileset["status"] in ["scan", "dat"]:
- cursor.execute(f"""
+ cursor.execute(
+ """
UPDATE fileset SET
- status = '{source_fileset["status"] if source_fileset["status"] != "dat" else "partial"}',
- `key` = '{source_fileset["key"]}',
- `timestamp` = '{source_fileset["timestamp"]}'
- WHERE id = {target_id}
- """)
- cursor.execute(f"SELECT * FROM file WHERE fileset = {source_id}")
+ status = %s,
+ `key` = %s,
+ `timestamp` = %s
+ WHERE id = %s
+ """,
+ (
+ source_fileset["status"]
+ if source_fileset["status"] != "dat"
+ else "partial",
+ source_fileset["key"],
+ source_fileset["timestamp"],
+ target_id,
+ ),
+ )
+ cursor.execute("SELECT * FROM file WHERE fileset = %s", (source_id,))
source_files = cursor.fetchall()
- cursor.execute(f"SELECT * FROM file WHERE fileset = {target_id}")
+ cursor.execute("SELECT * FROM file WHERE fileset = %s", (target_id,))
target_files = cursor.fetchall()
target_files_dict = {}
for target_file in target_files:
cursor.execute(
- f"SELECT * FROM filechecksum WHERE file = {target_file['id']}"
+ "SELECT * FROM filechecksum WHERE file = %s",
+ (target_file["id"],),
)
target_checksums = cursor.fetchall()
for checksum in target_checksums:
@@ -910,7 +969,8 @@ def execute_merge(id, source=None, target=None):
for source_file in source_files:
cursor.execute(
- f"SELECT * FROM filechecksum WHERE file = {source_file['id']}"
+ "SELECT * FROM filechecksum WHERE file = %s",
+ (source_file["id"],),
)
source_checksums = cursor.fetchall()
file_exists = False
@@ -921,13 +981,22 @@ def execute_merge(id, source=None, target=None):
source_file["detection"] = target_file["detection"]
cursor.execute(
- f"DELETE FROM file WHERE id = {target_file['id']}"
+ "DELETE FROM file WHERE id = %s", (target_file["id"],)
)
file_exists = True
break
print(file_exists)
- cursor.execute(f"""INSERT INTO file (name, size, checksum, fileset, detection, `timestamp`) VALUES (
- '{source_file["name"]}', '{source_file["size"]}', '{source_file["checksum"]}', {target_id}, {source_file["detection"]}, NOW())""")
+ cursor.execute(
+ """INSERT INTO file (name, size, checksum, fileset, detection, `timestamp`) VALUES (
+ %s, %s, %s, %s, %s, NOW())""",
+ (
+ source_file["name"],
+ source_file["size"],
+ source_file["checksum"],
+ target_id,
+ source_file["detection"],
+ ),
+ )
new_file_id = cursor.lastrowid
for checksum in source_checksums:
# TODO: Handle the string
@@ -942,10 +1011,13 @@ def execute_merge(id, source=None, target=None):
),
)
- cursor.execute(f"""
+ cursor.execute(
+ """
INSERT INTO history (`timestamp`, fileset, oldfileset)
- VALUES (NOW(), {target_id}, {source_id})
- """)
+ VALUES (NOW(), %s, %s)
+ """,
+ (target_id, source_id),
+ )
connection.commit()
@@ -960,8 +1032,8 @@ def mark_as_full(id):
try:
conn = db_connect()
with conn.cursor() as cursor:
- update_query = f"UPDATE fileset SET status = 'full' WHERE id = {id}"
- cursor.execute(update_query)
+ update_query = "UPDATE fileset SET status = 'full' WHERE id = %s"
+ cursor.execute(update_query, (id,))
create_log("Manual from Web", "Dev", f"Marked Fileset:{id} as full", conn)
conn.commit()
except Exception as e:
@@ -1181,8 +1253,10 @@ def delete_files(id):
connection = db_connect()
with connection.cursor() as cursor:
# SQL statements to delete related records
- cursor.execute(f"DELETE FROM filechecksum WHERE file IN ({ids_to_delete})")
- cursor.execute(f"DELETE FROM file WHERE id IN ({ids_to_delete})")
+ cursor.execute(
+ "DELETE FROM filechecksum WHERE file IN (%s)", (ids_to_delete,)
+ )
+ cursor.execute("DELETE FROM file WHERE id IN (%s)", (ids_to_delete,))
# Commit the deletions
connection.commit()
diff --git a/user_fileset_functions.py b/user_fileset_functions.py
index 91ad3d9..6ca1c1f 100644
--- a/user_fileset_functions.py
+++ b/user_fileset_functions.py
@@ -39,10 +39,10 @@ def file_json_to_array(file_json_object):
def user_insert_queue(user_fileset, conn):
- query = f"INSERT INTO queue (time, notes, fileset, ticketid, userid, commit) VALUES ({int(time.time())}, NULL, @fileset_last, NULL, NULL, NULL)"
+ query = "INSERT INTO queue (time, notes, fileset, ticketid, userid, commit) VALUES (%s, NULL, @fileset_last, NULL, NULL, NULL)"
with conn.cursor() as cursor:
- cursor.execute(query)
+ cursor.execute(query, (int(time.time()),))
conn.commit()
@@ -55,7 +55,7 @@ def user_insert_fileset(user_fileset, ip, conn):
cursor.execute("SELECT MAX(`transaction`) FROM transactions")
transaction_id = cursor.fetchone()["MAX(`transaction`)"] + 1
log_text = "from user submitted files"
- cursor.execute("SET @fileset_time_last = %s", (int(time.time())))
+ cursor.execute("SET @fileset_time_last = %s", (int(time.time()),))
if insert_fileset(
src, detection, key, megakey, transaction_id, log_text, conn, ip
):
@@ -79,7 +79,8 @@ def match_and_merge_user_filesets(id):
with conn.cursor() as cursor:
cursor.execute(
- f"SELECT fileset.id, filechecksum.checksum, src, status FROM fileset JOIN file ON file.fileset = fileset.id JOIN filechecksum ON file.id = filechecksum.file WHERE status = 'user' AND fileset.id = {id}"
+ "SELECT fileset.id, filechecksum.checksum, src, status FROM fileset JOIN file ON file.fileset = fileset.id JOIN filechecksum ON file.id = filechecksum.file WHERE status = 'user' AND fileset.id = %s",
+ (id,),
)
unmatched_files = cursor.fetchall()
@@ -110,11 +111,13 @@ def match_and_merge_user_filesets(id):
log_text = f"Matched game {matched_game['engineid']}:\n{matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']}\nvariant {matched_game['key']}. State {status}. Fileset:{fileset[0][0]}."
# Updating the fileset.game value to be $matched_game["id"]
- query = f"UPDATE fileset SET game = {matched_game['id']}, status = '{status}', `key` = '{matched_game['key']}' WHERE id = {fileset[0][0]}"
+ query = "UPDATE fileset SET game = %s, status = %s, `key` = %s WHERE id = %s"
history_last = merge_filesets(matched_game["fileset"], fileset[0][0])
- if cursor.execute(query):
+ if cursor.execute(
+ query, (matched_game["id"], status, matched_game["key"], fileset[0][0])
+ ):
user = f"cli:{getpass.getuser()}"
# Merge log
@@ -136,7 +139,7 @@ def match_and_merge_user_filesets(id):
# Add log id to the history table
cursor.execute(
- f"UPDATE history SET log = {log_last} WHERE id = {history_last}"
+ "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last)
)
if not conn.commit():
Commit: 3ac672af9cc0e2c2cab6cb25f2256a94111bd814
https://github.com/scummvm/scummvm-sites/commit/3ac672af9cc0e2c2cab6cb25f2256a94111bd814
Author: ShivangNagta (shivangnag at gmail.com)
Date: 2025-07-18T22:15:19+02:00
Commit Message:
INTEGRITY: Run ruff formatter on compute_hash.py.
Changed paths:
compute_hash.py
diff --git a/compute_hash.py b/compute_hash.py
index d63b22f..ba8c62f 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -7,6 +7,7 @@ from enum import Enum
from datetime import datetime, date, timedelta
from collections import defaultdict
+
class FileType(Enum):
NON_MAC = "non_mac"
MAC_BINARY = "macbinary"
@@ -16,10 +17,12 @@ class FileType(Enum):
RAW_RSRC = "raw_rsrc"
ACTUAL_FORK_MAC = "actual_fork_mac"
+
script_version = "0.1"
SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
+# fmt: off
# CRC table
CRC16_XMODEM_TABLE = [
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
@@ -62,11 +65,14 @@ def crc16xmodem(data, crc=0):
crc = ((crc << 8) & 0xff00) ^ CRC16_XMODEM_TABLE[(
(crc >> 8) & 0xff) ^ byte]
return crc & 0xffff
+# fmt: on
+
def filesize(filepath):
- """ Returns size of file """
+ """Returns size of file"""
return os.stat(filepath).st_size
+
def get_dirs_at_depth(directory, depth):
directory = directory.rstrip(os.path.sep)
assert os.path.isdir(directory)
@@ -78,7 +84,7 @@ def get_dirs_at_depth(directory, depth):
yield root
-def my_escape_string(s: str) -> str:
+def escape_string(s: str) -> str:
"""
Escape strings
@@ -105,7 +111,7 @@ def encode_punycode(orig):
- escape special characters and
- ensure filenames can't end in a space or dotif temp == None:
"""
- s = my_escape_string(orig)
+ s = escape_string(orig)
encoded = s.encode("punycode").decode("ascii")
# punyencoding adds an '-' at the end when there are no special chars
# don't use it for comparing
@@ -132,52 +138,61 @@ def punycode_need_encode(orig):
return True
return False
+
def encode_path_components(filepath):
"""
Puny encodes all separate components of filepath
"""
- parts = [i for i in filepath.split(os.sep) if i ]
- encoded_parts = [encode_punycode(p) if punycode_need_encode(p) else p for p in parts]
+ parts = [i for i in filepath.split(os.sep) if i]
+ encoded_parts = [
+ encode_punycode(p) if punycode_need_encode(p) else p for p in parts
+ ]
return os.path.join(*encoded_parts)
+
def read_be_32(byte_stream, signed=False):
- """ Return unsigned integer of size_in_bits, assuming the data is big-endian """
+ """Return unsigned integer of size_in_bits, assuming the data is big-endian"""
format = ">i" if signed else ">I"
- (uint,) = struct.unpack(format, byte_stream[:32//8])
+ (uint,) = struct.unpack(format, byte_stream[: 32 // 8])
return uint
+
def read_be_16(byte_stream):
- """ Return unsigned integer of size_in_bits, assuming the data is big-endian """
- (uint,) = struct.unpack(">H", byte_stream[:16//8])
+ """Return unsigned integer of size_in_bits, assuming the data is big-endian"""
+ (uint,) = struct.unpack(">H", byte_stream[: 16 // 8])
return uint
+
def is_raw_rsrc(filepath):
- """ Returns boolean, checking if the given .rsrc file is a raw .rsrc file and not appledouble."""
+ """Returns boolean, checking if the given .rsrc file is a raw .rsrc file and not appledouble."""
filename = os.path.basename(filepath)
if filename.endswith(".rsrc"):
with open(filepath, "rb") as f:
return not is_appledouble(f.read())
return False
+
def is_appledouble_rsrc(filepath):
- """ Returns boolean, checking whether the given .rsrc file is an appledouble or not."""
+ """Returns boolean, checking whether the given .rsrc file is an appledouble or not."""
filename = os.path.basename(filepath)
if filename.endswith(".rsrc"):
with open(filepath, "rb") as f:
return is_appledouble(f.read())
return False
+
def is_appledouble_in_dot_(filepath):
- """ Returns boolean, checking whether the given ._ file is an appledouble or not. It also checks that the parent directory is not __MACOSX as that case is handled differently """
+ """Returns boolean, checking whether the given ._ file is an appledouble or not. It also checks that the parent directory is not __MACOSX as that case is handled differently"""
filename = os.path.basename(filepath)
parent_dir = os.path.basename(os.path.dirname(filepath))
if filename.startswith("._") and parent_dir != "__MACOSX":
with open(filepath, "rb") as f:
return is_appledouble(f.read())
return False
-
+
+
def is_appledouble_in_macosx(filepath):
- """ Returns boolean, checking whether the given ._ file in __MACOSX folder is an appledouble or not. """
+ """Returns boolean, checking whether the given ._ file in __MACOSX folder is an appledouble or not."""
filename = os.path.basename(filepath)
parent_dir = os.path.basename(os.path.dirname(filepath))
if filename.startswith("._") and parent_dir == "__MACOSX":
@@ -185,6 +200,7 @@ def is_appledouble_in_macosx(filepath):
return is_appledouble(f.read())
return False
+
def is_macbin(filepath):
with open(filepath, "rb") as file:
header = file.read(128)
@@ -195,7 +211,12 @@ def is_macbin(filepath):
# Preliminary check
# Exclude files that have zero name len, zero data fork, zero name fork and zero type_creator.
- if not header[1] and not read_be_32(header[83:]) and not read_be_32(header[87:]) and not read_be_32(header[69:]):
+ if (
+ not header[1]
+ and not read_be_32(header[83:])
+ and not read_be_32(header[87:])
+ and not read_be_32(header[69:])
+ ):
return False
checksum = crc16xmodem(header[:124])
@@ -207,10 +228,10 @@ def is_macbin(filepath):
datalen = read_be_32(header[83:])
rsrclen = read_be_32(header[87:])
- datalen_pad = (((datalen + 127) >> 7) << 7)
+ datalen_pad = ((datalen + 127) >> 7) << 7
# Length check
- if (128 + datalen_pad + rsrclen <= filesize(filepath)):
+ if 128 + datalen_pad + rsrclen <= filesize(filepath):
res_fork_offset = 128 + datalen_pad
if res_fork_offset < 0:
@@ -218,12 +239,14 @@ def is_macbin(filepath):
return True
+
def is_actual_resource_fork_mac(filepath):
- """ Returns boolean, checking the actual mac fork if it exists. """
+ """Returns boolean, checking the actual mac fork if it exists."""
resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc")
return os.path.exists(resource_fork_path)
+
def is_appledouble(file_byte_stream):
"""
Appledouble Structure -
@@ -239,13 +262,14 @@ def is_appledouble(file_byte_stream):
+$04 / 4: offset to data from start of file
+$08 / 4: length of entry in bytes; may be zero
"""
- if (not file_byte_stream or read_be_32(file_byte_stream) != 0x00051607):
+ if not file_byte_stream or read_be_32(file_byte_stream) != 0x00051607:
return False
return True
+
def macbin_get_resfork_data(file_byte_stream):
- """ Returns the resource fork's data section as bytes, data fork size (size), resource fork size (size-r) and data section of resource fork size (size-rd) of a macbinary file """
+ """Returns the resource fork's data section as bytes, data fork size (size), resource fork size (size-r) and data section of resource fork size (size-rd) of a macbinary file"""
if not file_byte_stream:
return file_byte_stream
@@ -255,38 +279,57 @@ def macbin_get_resfork_data(file_byte_stream):
(rsrclen,) = struct.unpack(">I", file_byte_stream[0x57:0x5B])
resoure_fork_offset = 128 + datalen_padded
- rd_offset = int.from_bytes(file_byte_stream[resoure_fork_offset+0 : resoure_fork_offset+4])
- rd_length = int.from_bytes(file_byte_stream[resoure_fork_offset+8 : resoure_fork_offset+12])
+ rd_offset = int.from_bytes(
+ file_byte_stream[resoure_fork_offset + 0 : resoure_fork_offset + 4]
+ )
+ rd_length = int.from_bytes(
+ file_byte_stream[resoure_fork_offset + 8 : resoure_fork_offset + 12]
+ )
+
+ return (
+ file_byte_stream[
+ resoure_fork_offset + rd_offset : resoure_fork_offset
+ + rd_offset
+ + rd_length
+ ],
+ datalen,
+ rsrclen,
+ rd_length,
+ )
- return (file_byte_stream[resoure_fork_offset + rd_offset: resoure_fork_offset + rd_offset + rd_length], datalen, rsrclen, rd_length)
def macbin_get_datafork(file_byte_stream):
if not file_byte_stream:
return file_byte_stream
(datalen,) = struct.unpack(">I", file_byte_stream[0x53:0x57])
- return file_byte_stream[0x80: 0x80 + datalen]
+ return file_byte_stream[0x80 : 0x80 + datalen]
def appledouble_get_resfork_data(file_byte_stream):
- """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file"""
-
+ """Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file"""
+
entry_count = read_be_16(file_byte_stream[24:])
for entry in range(entry_count):
- start_index = 26 + entry*12
+ start_index = 26 + entry * 12
id = read_be_32(file_byte_stream[start_index:])
- offset = read_be_32(file_byte_stream[start_index+4:])
- length = read_be_32(file_byte_stream[start_index+8:])
+ offset = read_be_32(file_byte_stream[start_index + 4 :])
+ length = read_be_32(file_byte_stream[start_index + 8 :])
if id == 2:
- resource_fork_stream = file_byte_stream[offset:offset+length]
+ resource_fork_stream = file_byte_stream[offset : offset + length]
rd_offset = int.from_bytes(resource_fork_stream[0:4])
rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[rd_offset: rd_offset+rd_length], length, rd_length)
+ return (
+ resource_fork_stream[rd_offset : rd_offset + rd_length],
+ length,
+ rd_length,
+ )
+
def appledouble_get_datafork(filepath, fileinfo):
- """ Returns data fork's content as bytes and size of data fork of an appledouble file."""
+ """Returns data fork's content as bytes and size of data fork of an appledouble file."""
try:
index = filepath.index("__MACOSX")
except ValueError:
@@ -294,7 +337,7 @@ def appledouble_get_datafork(filepath, fileinfo):
if index is not None:
# Remove '__MACOSX/' from filepath
- filepath = filepath[:index] + filepath[index+8+1:]
+ filepath = filepath[:index] + filepath[index + 8 + 1 :]
parent_filepath = os.path.dirname(filepath)
data_fork_path = os.path.join(parent_filepath, fileinfo[1])
@@ -303,38 +346,46 @@ def appledouble_get_datafork(filepath, fileinfo):
data = f.read()
return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
- return b''
+ return b""
+
def raw_rsrc_get_datafork(filepath):
- """ Returns the data fork's content as bytes and size of the data fork corresponding to raw rsrc file. """
+ """Returns the data fork's content as bytes and size of the data fork corresponding to raw rsrc file."""
try:
- with open(filepath[:-5]+".data", "rb") as f:
+ with open(filepath[:-5] + ".data", "rb") as f:
data = f.read()
return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
- return b''
+ return b""
+
def raw_rsrc_get_resource_fork_data(filepath):
- """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of a raw rsrc file."""
+ """Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of a raw rsrc file."""
with open(filepath, "rb") as f:
resource_fork_stream = f.read()
resource_fork_len = len(resource_fork_stream)
rd_offset = int.from_bytes(resource_fork_stream[0:4])
rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length)
+ return (
+ resource_fork_stream[rd_offset : rd_offset + rd_length],
+ resource_fork_len,
+ rd_length,
+ )
+
def actual_mac_fork_get_data_fork(filepath):
- """ Returns the data fork's content as bytes and its size if the actual mac fork exists """
+ """Returns the data fork's content as bytes and its size if the actual mac fork exists"""
try:
with open(filepath, "rb") as f:
data = f.read()
return (data, len(data))
except (FileNotFoundError, IsADirectoryError):
- return b''
+ return b""
+
def actual_mac_fork_get_resource_fork_data(filepath):
- """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of the actual mac fork."""
+ """Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of the actual mac fork."""
resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc")
with open(resource_fork_path, "rb") as f:
resource_fork_stream = f.read()
@@ -342,17 +393,31 @@ def actual_mac_fork_get_resource_fork_data(filepath):
rd_offset = int.from_bytes(resource_fork_stream[0:4])
rd_length = int.from_bytes(resource_fork_stream[8:12])
- return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length)
+ return (
+ resource_fork_stream[rd_offset : rd_offset + rd_length],
+ resource_fork_len,
+ rd_length,
+ )
+
def file_checksum(filepath, alg, custom_checksum_size, file_info):
with open(filepath, "rb") as f:
if file_info[0] == FileType.NON_MAC:
- return (create_checksum_pairs(checksum(f, alg, custom_checksum_size, filepath), alg, custom_checksum_size), filesize(filepath), 0, 0)
-
+ return (
+ create_checksum_pairs(
+ checksum(f, alg, custom_checksum_size, filepath),
+ alg,
+ custom_checksum_size,
+ ),
+ filesize(filepath),
+ 0,
+ 0,
+ )
+
# Processing mac files
res = []
- resfork = b''
- datafork = b''
+ resfork = b""
+ datafork = b""
file_data = f.read()
size = 0
@@ -362,27 +427,34 @@ def file_checksum(filepath, alg, custom_checksum_size, file_info):
if file_info[0] == FileType.MAC_BINARY:
(resfork, size, size_r, size_rd) = macbin_get_resfork_data(file_data)
datafork = macbin_get_datafork(file_data)
- elif file_info[0] in {FileType.APPLE_DOUBLE_DOT_, FileType.APPLE_DOUBLE_RSRC, FileType.APPLE_DOUBLE_MACOSX}:
+ elif file_info[0] in {
+ FileType.APPLE_DOUBLE_DOT_,
+ FileType.APPLE_DOUBLE_RSRC,
+ FileType.APPLE_DOUBLE_MACOSX,
+ }:
(resfork, size_r, size_rd) = appledouble_get_resfork_data(file_data)
(datafork, size) = appledouble_get_datafork(filepath, file_info)
elif file_info[0] == FileType.RAW_RSRC:
(resfork, size_r, size_rd) = raw_rsrc_get_resource_fork_data(filepath)
datafork, size = raw_rsrc_get_datafork(filepath)
elif file_info[0] == FileType.ACTUAL_FORK_MAC:
- (resfork, size_r, size_rd) = actual_mac_fork_get_resource_fork_data(filepath)
+ (resfork, size_r, size_rd) = actual_mac_fork_get_resource_fork_data(
+ filepath
+ )
(datafork, size) = actual_mac_fork_get_data_fork(filepath)
hashes = checksum(resfork, alg, custom_checksum_size, filepath)
- prefix = 'r'
+ prefix = "r"
if len(resfork):
res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix))
hashes = checksum(datafork, alg, custom_checksum_size, filepath)
- prefix = 'd'
+ prefix = "d"
res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix))
return (res, size, size_r, size_rd)
+
def create_checksum_pairs(hashes, alg, size, prefix=None):
res = []
@@ -392,23 +464,24 @@ def create_checksum_pairs(hashes, alg, size, prefix=None):
keys.append(f"{alg}-{size}")
if prefix:
for i, key in enumerate(keys):
- key_split = key.split('-')
+ key_split = key.split("-")
# If key is of the form "md5-t-5000"
- if (len(key_split) == 3):
+ if len(key_split) == 3:
key_split[1] = f"{prefix}{key_split[1]}"
else:
key_split.insert(1, prefix)
- keys[i] = '-'.join(key_split)
+ keys[i] = "-".join(key_split)
for i, h in enumerate(hashes):
res.append((keys[i], h))
return res
+
def checksum(file, alg, size, filepath):
- """ Returns checksum value of file buffer using a specific algoritm """
+ """Returns checksum value of file buffer using a specific algoritm"""
# Will contain 5 elements:
# - Full size checksum
# - Checksum of first 5000B
@@ -458,7 +531,7 @@ def checksum(file, alg, size, filepath):
hashes[0].update(bytes_stream)
hashes[1].update(bytes_stream[:5000])
- hashes[2].update(bytes_stream[:1024 * 1024])
+ hashes[2].update(bytes_stream[: 1024 * 1024])
if len(bytes_stream) >= 5000:
hashes[3].update(bytes_stream[-5000:])
else:
@@ -473,27 +546,29 @@ def checksum(file, alg, size, filepath):
hashes = [h.hexdigest() for h in hashes if h]
return hashes
+
def extract_macbin_filename_from_header(file):
- """ Extracts the filename from the header of the macbinary. """
+ """Extracts the filename from the header of the macbinary."""
with open(file, "rb") as f:
header = f.read(128)
name_len = header[1]
- filename_bytes = header[2:2+name_len]
+ filename_bytes = header[2 : 2 + name_len]
return filename_bytes.decode("utf-8")
+
def file_classification(filepath):
- """ Returns [ Filetype, Filename ]. Filetype is an enum value - NON_MAC, MAC_BINARY, APPLE_DOUBLE_RSRC, APPLE_DOUBLE_MACOSX, APPLE_DOUBLE_DOT_, RAW_RSRC
- Filename for a normal file is the same as the original. Extensions are dropped for macfiles. """
+ """Returns [ Filetype, Filename ]. Filetype is an enum value - NON_MAC, MAC_BINARY, APPLE_DOUBLE_RSRC, APPLE_DOUBLE_MACOSX, APPLE_DOUBLE_DOT_, RAW_RSRC
+ Filename for a normal file is the same as the original. Extensions are dropped for macfiles."""
# 1. Macbinary
if is_macbin(filepath):
- return [FileType.MAC_BINARY, extract_macbin_filename_from_header(filepath)]
-
+ return [FileType.MAC_BINARY, extract_macbin_filename_from_header(filepath)]
+
# 2. Appledouble .rsrc
if is_appledouble_rsrc(filepath):
base_name, _ = os.path.splitext(os.path.basename(filepath))
return [FileType.APPLE_DOUBLE_RSRC, base_name]
-
+
# 3. Raw .rsrc
if is_raw_rsrc(filepath):
base_name, _ = os.path.splitext(os.path.basename(filepath))
@@ -510,59 +585,65 @@ def file_classification(filepath):
filename = os.path.basename(filepath)
actual_filename = filename[2:]
return [FileType.APPLE_DOUBLE_MACOSX, actual_filename]
-
+
# 6. Actual resource fork of mac
if is_actual_resource_fork_mac(filepath):
filename = os.path.basename(filepath)
return [FileType.ACTUAL_FORK_MAC, filename]
-
+
# Normal file
else:
return [FileType.NON_MAC, os.path.basename(filepath)]
+
def file_filter(files):
- """ Removes extra macfiles from the given dictionary of files that are not needed for fork calculation.
- This avoids extra checksum calculation of these mac files in form of non-mac files """
-
+ """Removes extra macfiles from the given dictionary of files that are not needed for fork calculation.
+ This avoids extra checksum calculation of these mac files in form of non-mac files"""
+
to_be_deleted = []
-
+
for filepath, file_info in files.items():
- # For filename.rsrc (apple double rsrc), corresponding filename file (data fork) will be removed from the files dictionary
- if (file_info[0] == FileType.APPLE_DOUBLE_RSRC):
+ # For filename.rsrc (apple double rsrc), corresponding filename file (data fork) will be removed from the files dictionary
+ if file_info[0] == FileType.APPLE_DOUBLE_RSRC:
parent_dir_path = os.path.dirname(filepath)
expected_data_fork_path = os.path.join(parent_dir_path, file_info[1])
- if (expected_data_fork_path in files):
+ if expected_data_fork_path in files:
to_be_deleted.append(expected_data_fork_path)
- # For ._filename, corresponding filename file (data fork) will be removed from the files dictionary
- elif (file_info[0] == FileType.APPLE_DOUBLE_DOT_):
+ # For ._filename, corresponding filename file (data fork) will be removed from the files dictionary
+ elif file_info[0] == FileType.APPLE_DOUBLE_DOT_:
parent_dir_path = os.path.dirname(filepath)
expected_data_fork_path = os.path.join(parent_dir_path, file_info[1])
- if (expected_data_fork_path in files):
- to_be_deleted.append(expected_data_fork_path)
+ if expected_data_fork_path in files:
+ to_be_deleted.append(expected_data_fork_path)
- # For ._filename, corresponding ../filename file (data fork) will be removed from the files dictionary
- elif (file_info[0] == FileType.APPLE_DOUBLE_MACOSX):
+ # For ._filename, corresponding ../filename file (data fork) will be removed from the files dictionary
+ elif file_info[0] == FileType.APPLE_DOUBLE_MACOSX:
grand_parent_dir_path = os.path.dirname(os.path.dirname(filepath))
expected_data_fork_path = os.path.join(grand_parent_dir_path, file_info[1])
- if (expected_data_fork_path in files):
+ if expected_data_fork_path in files:
to_be_deleted.append(expected_data_fork_path)
# For filename.rsrc (raw rsrc), corresponding filename.data file (data fork) and filename.finf file (finder info) will be removed from the files dictionary
- elif (file_info[0] == FileType.RAW_RSRC):
+ elif file_info[0] == FileType.RAW_RSRC:
parent_dir_path = os.path.dirname(filepath)
- expected_data_fork_path = os.path.join(parent_dir_path, file_info[1]) + ".data"
+ expected_data_fork_path = (
+ os.path.join(parent_dir_path, file_info[1]) + ".data"
+ )
expected_finf_path = os.path.join(parent_dir_path, file_info[1]) + ".finf"
- if (expected_data_fork_path in files):
+ if expected_data_fork_path in files:
to_be_deleted.append(expected_data_fork_path)
- if (expected_finf_path in files):
+ if expected_finf_path in files:
to_be_deleted.append(expected_finf_path)
for file in to_be_deleted:
del files[file]
-def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=None, alg="md5"):
- """ Return dictionary containing checksums of all files in directory """
+
+def compute_hash_of_dirs(
+ root_directory, depth, size=0, limit_timestamps_date=None, alg="md5"
+):
+ """Return dictionary containing checksums of all files in directory"""
res = []
for directory in get_dirs_at_depth(root_directory, depth):
@@ -593,11 +674,13 @@ def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=No
relative_dir = os.path.dirname(relative_path)
relative_path = os.path.join(relative_dir, base_name)
- if (file_info[0] == FileType.APPLE_DOUBLE_MACOSX):
+ if file_info[0] == FileType.APPLE_DOUBLE_MACOSX:
relative_dir = os.path.dirname(os.path.dirname(relative_path))
- relative_path = os.path.join(relative_dir, base_name)
+ relative_path = os.path.join(relative_dir, base_name)
- hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info) + (filtered_file_map[file_path],)
+ hash_of_dir[relative_path] = file_checksum(
+ file_path, alg, size, file_info
+ ) + (filtered_file_map[file_path],)
res.append(hash_of_dir)
return res
@@ -611,35 +694,35 @@ def extract_macbin_mtime(file_byte_stream):
"""
macbin_epoch = datetime(1904, 1, 1)
header = file_byte_stream[:128]
- macbin_seconds = read_be_32(header[0x5f:])
+ macbin_seconds = read_be_32(header[0x5F:])
return (macbin_epoch + timedelta(seconds=macbin_seconds)).date()
def extract_mtime_appledouble(file_byte_stream):
"""
- Returns modification time of appledouble file.
- Doc 1 - The File Dates Info entry (ID=8) consists of the file creation, modification, backup
- and access times (see Figure 2-1), stored as a signed number of seconds before
- or after 12:00 a.m. (midnight), January 1, 2000 Greenwich Mean Time (GMT)
-
- Doc 2 -
- struct ASFileDates /* entry ID 8, file dates info */
- {
- sint32 create; /* file creation date/time */
- sint32 modify; /* last modification date/time */
- sint32 backup; /* last backup date/time */
- sint32 access; /* last access date/time */
- }; /* ASFileDates */
+ Returns modification time of appledouble file.
+ Doc 1 - The File Dates Info entry (ID=8) consists of the file creation, modification, backup
+ and access times (see Figure 2-1), stored as a signed number of seconds before
+ or after 12:00 a.m. (midnight), January 1, 2000 Greenwich Mean Time (GMT)
+
+ Doc 2 -
+ struct ASFileDates /* entry ID 8, file dates info */
+ {
+ sint32 create; /* file creation date/time */
+ sint32 modify; /* last modification date/time */
+ sint32 backup; /* last backup date/time */
+ sint32 access; /* last access date/time */
+ }; /* ASFileDates */
"""
entry_count = read_be_16(file_byte_stream[24:])
for entry in range(entry_count):
- start_index = 26 + entry*12
+ start_index = 26 + entry * 12
id = read_be_32(file_byte_stream[start_index:])
- offset = read_be_32(file_byte_stream[start_index+4:])
- length = read_be_32(file_byte_stream[start_index+8:])
+ offset = read_be_32(file_byte_stream[start_index + 4 :])
+ length = read_be_32(file_byte_stream[start_index + 8 :])
if id == 8:
- date_info_data = file_byte_stream[offset:offset + length]
+ date_info_data = file_byte_stream[offset : offset + length]
if len(date_info_data) < 16:
raise ValueError("FileDatesInfo block is too short.")
appledouble_epoch = datetime(2000, 1, 1)
@@ -661,7 +744,11 @@ def macfile_timestamp(filepath):
return extract_macbin_mtime(data)
# Appledouble
- if is_appledouble_rsrc(filepath) or is_appledouble_in_dot_(filepath) or is_appledouble_in_macosx(filepath):
+ if (
+ is_appledouble_rsrc(filepath)
+ or is_appledouble_in_dot_(filepath)
+ or is_appledouble_in_macosx(filepath)
+ ):
return extract_mtime_appledouble(data)
return None
@@ -698,7 +785,9 @@ def filter_files_by_timestamp(files, limit_timestamps_date):
mtime = macfile_timestamp(filepath)
if mtime is None:
mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
- if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)):
+ if limit_timestamps_date is None or (
+ limit_timestamps_date is not None and (mtime <= user_date or mtime == today)
+ ):
filtered_file_map[filepath] = str(mtime)
return filtered_file_map
@@ -707,17 +796,25 @@ def filter_files_by_timestamp(files, limit_timestamps_date):
def create_dat_file(hash_of_dirs, path, checksum_size=0):
with open(f"{os.path.basename(path)}.dat", "w") as file:
# Header
- file.writelines([
- "scummvm (\n",
- f"\tauthor scan\n",
- f"\tversion {script_version}\n",
- ")\n\n"
- ])
+ file.writelines(
+ [
+ "scummvm (\n",
+ "\tauthor scan\n",
+ f"\tversion {script_version}\n",
+ ")\n\n",
+ ]
+ )
# Game files
for hash_of_dir in hash_of_dirs:
file.write("game (\n")
- for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items():
+ for filename, (
+ hashes,
+ size,
+ size_r,
+ size_rd,
+ timestamp,
+ ) in hash_of_dir.items():
filename = encode_path_components(filename)
data = f"name '{filename}' size {size} size-r {size_r} size-rd {size_rd} modification-time {timestamp}"
for key, value in hashes:
@@ -729,25 +826,27 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
class MyParser(argparse.ArgumentParser):
def error(self, message):
- sys.stderr.write('Error: %s\n' % message)
+ sys.stderr.write("Error: %s\n" % message)
self.print_help()
sys.exit(2)
parser = argparse.ArgumentParser()
-parser.add_argument("--directory",
- help="Path of directory with game files")
-parser.add_argument("--depth",
- help="Depth from root to game directories")
-parser.add_argument("--size",
- help="Use first n bytes of file to calculate checksum")
-parser.add_argument("--limit-timestamps",
- help="Format - YYYY-MM-DD or YYYY-MM or YYYY. Filters out the files those were modified after the given timestamp. Note that if the modification time is today, it would not be filtered out.")
+parser.add_argument("--directory", help="Path of directory with game files")
+parser.add_argument("--depth", help="Depth from root to game directories")
+parser.add_argument("--size", help="Use first n bytes of file to calculate checksum")
+parser.add_argument(
+ "--limit-timestamps",
+ help="Format - YYYY-MM-DD or YYYY-MM or YYYY. Filters out the files those were modified after the given timestamp. Note that if the modification time is today, it would not be filtered out.",
+)
args = parser.parse_args()
path = os.path.abspath(args.directory) if args.directory else os.getcwd()
depth = int(args.depth) if args.depth else 0
checksum_size = int(args.size) if args.size else 0
limit_timestamp_date = str(args.limit_timestamps) if args.limit_timestamps else None
-create_dat_file(compute_hash_of_dirs(
- path, depth, checksum_size, limit_timestamp_date), path, checksum_size)
+create_dat_file(
+ compute_hash_of_dirs(path, depth, checksum_size, limit_timestamp_date),
+ path,
+ checksum_size,
+)
More information about the Scummvm-git-logs
mailing list