[Scummvm-git-logs] scummvm master -> be55ccf1a31a18ec96e9b1fee5ecaf501ed5b2d8

lephilousophe noreply at scummvm.org
Fri Nov 15 08:37:24 UTC 2024


This automated email contains information about 8 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
890b0c49bf COMMON: Make punycode_needEncode coherent with punycode_encode
93c68d38d4 COMMON: Allow to determine if a path needs punyencoding
0200a3d562 COMMON: Avoid punycoding of Win32 paths
29f701d8f6 BASE: Fix soundfont command line switch parsing
ac4b547fe9 WIN32: Make DLC Path object use the proper separator
6678e1b5b1 COMMON: Avoid encoding paths if possible on all drive based platforms
234f3eb7f9 COMMON: Don't puny encode paths using UTF-8 but Latin-1
be55ccf1a3 COMMON: Remove two outdated function prototypes from Path


Commit: 890b0c49bf67a4d901b704fffb07f0c402f6b101
    https://github.com/scummvm/scummvm/commit/890b0c49bf67a4d901b704fffb07f0c402f6b101
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Make punycode_needEncode coherent with punycode_encode

This is a follow-up of b930232e6dfa93c1fc0bdc282b2154078918c311

Changed paths:
    common/punycode.cpp


diff --git a/common/punycode.cpp b/common/punycode.cpp
index 9791858e802..9b32ec57c8a 100644
--- a/common/punycode.cpp
+++ b/common/punycode.cpp
@@ -210,6 +210,11 @@ bool punycode_needEncode(const String &src) {
 	if (!src.size())
 		return false;
 
+	// If name begins with xn-- this could become ambiguous
+	if (src.size() > 4 && src[0] == 'x' && src[1] == 'n' &&
+		src[2] == '-' && src[3] == '-')
+		return true;
+
 	for (uint si = 0; si < src.size(); si++) {
 		if (src[si] & 0x80 || src[si] < 0x20 || strchr(SPECIAL_SYMBOLS, src[si])) {
 			return true;


Commit: 93c68d38d4dd35eb0fdfc0d1c842912ffdae8b71
    https://github.com/scummvm/scummvm/commit/93c68d38d4dd35eb0fdfc0d1c842912ffdae8b71
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Allow to determine if a path needs punyencoding

Changed paths:
    common/path.cpp
    common/path.h


diff --git a/common/path.cpp b/common/path.cpp
index 1d289ac3020..a2908f1a76b 100644
--- a/common/path.cpp
+++ b/common/path.cpp
@@ -1025,6 +1025,18 @@ Path Path::punycodeEncode() const {
 		}, tmp);
 }
 
+bool Path::punycodeNeedsEncode() const {
+	bool tmp;
+	return reduceComponents<bool &>(
+		[](bool &result, const String &in, bool last) -> bool & {
+			// If we already need encode, we still need it
+			if (result) return result;
+
+			result = punycode_needEncode(in);
+			return result;
+		}, tmp);
+}
+
 // For a path component creates a string with following property:
 // if 2 files have the same case-insensitive
 // identifier string then and only then we treat them as
diff --git a/common/path.h b/common/path.h
index 0da427e8c4e..bb90ad38cb5 100644
--- a/common/path.h
+++ b/common/path.h
@@ -498,6 +498,11 @@ public:
 	 */
 	Path punycodeEncode() const;
 
+	/**
+	 * Returns whether the path will need to be Punycoded
+	 */
+	bool punycodeNeedsEncode() const;
+
 	/**
 	 * Convert all characters in the path to lowercase.
 	 *


Commit: 0200a3d562e0c89de9a29d2fdd682ac4b6fb8b07
    https://github.com/scummvm/scummvm/commit/0200a3d562e0c89de9a29d2fdd682ac4b6fb8b07
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Avoid punycoding of Win32 paths

Win32 paths contain a colon which triggers punycoding.
For Win32 UNICODE, paths are encoded in UTF-8 so there is not a real
need to puny-encode them anyway.
Only detect flagrant forbidden cases and store them the traditional way.

Without UNICODE support, the paths are provided in the system encoding.
Storing them as-is would make the configuration file mixing encodings.
For such paths don't punyencode if they don't need it after the drive
specifier.

Changed paths:
    common/path.cpp
    common/path.h


diff --git a/common/path.cpp b/common/path.cpp
index a2908f1a76b..64745b0f8ce 100644
--- a/common/path.cpp
+++ b/common/path.cpp
@@ -1152,6 +1152,54 @@ Path Path::fromConfig(const String &value) {
 	return Path(value, '/').punycodeDecode();
 }
 
+String Path::toConfig() const {
+#if defined(WIN32)
+	if (!isEscaped()) {
+		// If we are escaped, we have forbidden characters (slash or pipe) which must be encoded
+		// This can't happen on real Win32 paths
+#if defined(UNICODE)
+		// With UNICODE every path is encoded by the backend to UTF-8 strings all the configuration
+		// Except for (strange) cases where we would like to store paths containing backslashes,
+		// there is no need for escaping
+		if (strchr(_str.c_str(), Path::kNativeSeparator) == nullptr) {
+			return toString(Path::kNativeSeparator);
+		}
+#else
+		// Under WIN32 we try to store paths without punycoding the ':' for drives
+		// and using \ to keep the file simple.
+		// This also allows the configuration file to be backwards compatible for simple cases.
+		// Having non-ASCII characters in the path makes it punycoded anyway.
+		const char *start = nullptr;
+
+		// Check for DOS, Win32 device namespace and Win32 file name namespace style paths
+		// The checks are done from less costly to more
+		if (_str.size() >= 2 && _str[1] == ':' && Common::isAlpha(_str[0])) {
+			// This looks like a DOS drive specifier,
+			start = _str.c_str() + 2;
+		} else if (_str.size() >= 4 &&
+			_str[0] == SEPARATOR &&
+			_str[1] == SEPARATOR &&
+			_str[3] == SEPARATOR && (
+				_str[2] == '?' ||
+				_str[2] == '.')) {
+			// This looks like a Win32 device or drive namespaces specifier...
+			start = _str.c_str() + 4;
+			if (_str.size() >= 6 && _str[5] == ':' && Common::isAlpha(_str[4])) {
+				// ...which contains a drive specifier
+				start += 2;
+			}
+		}
+		// with some luck we don't need to punycode the path
+		if (start && !extract(start).punycodeNeedsEncode()) {
+			return toString(Path::kNativeSeparator);
+		}
+#endif
+	}
+#endif
+
+	return punycodeEncode().toString('/');
+}
+
 Path Path::fromCommandLine(const String &value) {
 	if (value.empty()) {
 		return Path();
diff --git a/common/path.h b/common/path.h
index bb90ad38cb5..c435ed3e3e2 100644
--- a/common/path.h
+++ b/common/path.h
@@ -561,10 +561,10 @@ public:
 	/**
 	 * Use by ConfigManager to store a path in a protected fashion
 	 * All components are punyencoded and / is used as a delimiter for all platforms
+	 * Under Windows don't encode when it's not needed and make use of \ separator
+	 * in this case
 	 */
-	String toConfig() const {
-		return punycodeEncode().toString('/');
-	}
+	String toConfig() const;
 
 	/**
 	 * Used by ConfigManager to parse a configuration value in a backwards compatible way


Commit: 29f701d8f63779614f78279d05f4b0f717368f7d
    https://github.com/scummvm/scummvm/commit/29f701d8f63779614f78279d05f4b0f717368f7d
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
BASE: Fix soundfont command line switch parsing

This is a leftover from c1b4dfad77113012dfea4e9688613c75012e3312

Changed paths:
    base/commandLine.cpp


diff --git a/base/commandLine.cpp b/base/commandLine.cpp
index 554f3c03316..234ab0b51fb 100644
--- a/base/commandLine.cpp
+++ b/base/commandLine.cpp
@@ -856,12 +856,13 @@ Common::String parseCommandLine(Common::StringMap &settings, int argc, const cha
 			END_OPTION
 
 			DO_LONG_OPTION("soundfont")
-				Common::FSNode path(Common::Path::fromConfig(option));
+				Common::FSNode path(Common::Path::fromCommandLine(option));
 				if (!path.exists()) {
 					usage("Non-existent soundfont path '%s'", option);
 				} else if (!path.isReadable()) {
 					usage("Non-readable soundfont path '%s'", option);
 				}
+				settings["soundfont"] = path.getPath().toConfig();
 			END_OPTION
 
 #ifdef SDL_BACKEND


Commit: ac4b547fe9fd7b66f9ab109ebc266d1291bec9c1
    https://github.com/scummvm/scummvm/commit/ac4b547fe9fd7b66f9ab109ebc266d1291bec9c1
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
WIN32: Make DLC Path object use the proper separator

Changed paths:
    backends/platform/sdl/win32/win32.cpp


diff --git a/backends/platform/sdl/win32/win32.cpp b/backends/platform/sdl/win32/win32.cpp
index 607d015f6aa..695653394fa 100644
--- a/backends/platform/sdl/win32/win32.cpp
+++ b/backends/platform/sdl/win32/win32.cpp
@@ -289,7 +289,7 @@ Common::Path OSystem_Win32::getDefaultDLCsPath() {
 		CreateDirectory(dlcsPath, nullptr);
 	}
 
-	return Common::Path(Win32::tcharToString(dlcsPath));
+	return Common::Path(Win32::tcharToString(dlcsPath), Common::Path::kNativeSeparator);
 }
 
 Common::Path OSystem_Win32::getScreenshotsPath() {


Commit: 6678e1b5b128b4c3db3c5160bda9ac1f9c950c64
    https://github.com/scummvm/scummvm/commit/6678e1b5b128b4c3db3c5160bda9ac1f9c950c64
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Avoid encoding paths if possible on all drive based platforms

Changed paths:
    common/path.cpp


diff --git a/common/path.cpp b/common/path.cpp
index 64745b0f8ce..1493c3bcb51 100644
--- a/common/path.cpp
+++ b/common/path.cpp
@@ -1153,47 +1153,31 @@ Path Path::fromConfig(const String &value) {
 }
 
 String Path::toConfig() const {
-#if defined(WIN32)
+#if defined(WIN32) && defined(UNICODE)
 	if (!isEscaped()) {
 		// If we are escaped, we have forbidden characters (slash or pipe) which must be encoded
 		// This can't happen on real Win32 paths
-#if defined(UNICODE)
 		// With UNICODE every path is encoded by the backend to UTF-8 strings all the configuration
 		// Except for (strange) cases where we would like to store paths containing backslashes,
 		// there is no need for escaping
 		if (strchr(_str.c_str(), Path::kNativeSeparator) == nullptr) {
 			return toString(Path::kNativeSeparator);
 		}
-#else
-		// Under WIN32 we try to store paths without punycoding the ':' for drives
-		// and using \ to keep the file simple.
-		// This also allows the configuration file to be backwards compatible for simple cases.
-		// Having non-ASCII characters in the path makes it punycoded anyway.
-		const char *start = nullptr;
-
-		// Check for DOS, Win32 device namespace and Win32 file name namespace style paths
-		// The checks are done from less costly to more
-		if (_str.size() >= 2 && _str[1] == ':' && Common::isAlpha(_str[0])) {
-			// This looks like a DOS drive specifier,
-			start = _str.c_str() + 2;
-		} else if (_str.size() >= 4 &&
-			_str[0] == SEPARATOR &&
-			_str[1] == SEPARATOR &&
-			_str[3] == SEPARATOR && (
-				_str[2] == '?' ||
-				_str[2] == '.')) {
-			// This looks like a Win32 device or drive namespaces specifier...
-			start = _str.c_str() + 4;
-			if (_str.size() >= 6 && _str[5] == ':' && Common::isAlpha(_str[4])) {
-				// ...which contains a drive specifier
-				start += 2;
-			}
-		}
-		// with some luck we don't need to punycode the path
-		if (start && !extract(start).punycodeNeedsEncode()) {
+	}
+#elif defined(__3DS__) || defined(__amigaos4__) || defined(__DS__) || defined(__MORPHOS__) || defined(NINTENDO_SWITCH) || defined(__PSP__) || defined(PSP2) || defined(__WII__) || defined(WIN32)
+	// For all platforms making use of : as a drive separator, avoid useless punycoding
+	if (!isEscaped()) {
+		// If we are escaped, we have forbidden characters which must be encoded
+		// Try to replace all : by SEPARATOR and check if we need puny encoding: if we don't, we are safe
+		Path tmp(*this);
+		tmp._str.replace(':', SEPARATOR);
+#if defined(WIN32)
+		// WIN32 can also make use of ? in Win32 devices namespace
+		tmp._str.replace('?', SEPARATOR);
+#endif
+		if (!tmp.punycodeNeedsEncode()) {
 			return toString(Path::kNativeSeparator);
 		}
-#endif
 	}
 #endif
 


Commit: 234f3eb7f9c7aa03f20b0c1d78d754983dc7ba03
    https://github.com/scummvm/scummvm/commit/234f3eb7f9c7aa03f20b0c1d78d754983dc7ba03
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Don't puny encode paths using UTF-8 but Latin-1

This avoids choking on invalid codepoints under Windows without
Unicode.
Latin-1 encoding has an identity mapping: every byte in the String will
get translated as-is in the U32String.

Don't modify punycodeDecode and punycodeEncode as the official
punyencoder uses unicode codepoints.

Changed paths:
    common/path.cpp


diff --git a/common/path.cpp b/common/path.cpp
index 1493c3bcb51..3acdfaf2147 100644
--- a/common/path.cpp
+++ b/common/path.cpp
@@ -1149,7 +1149,20 @@ Path Path::fromConfig(const String &value) {
 #endif
 
 	// If the path is not punyencoded this will be a no-op
-	return Path(value, '/').punycodeDecode();
+	Path tmp;
+	return Path(value, '/').reduceComponents<Path &>(
+		[](Path &path, const String &in, bool last) -> Path & {
+			// We encode the result as Latin-1: every byte has its own value
+			// This avoids error for non UTF-8 paths
+			String out = punycode_hasprefix(in) ?
+				     punycode_decodefilename(in).encode(kISO8859_1) :
+				     in;
+			path.appendInPlace(out, kNoSeparator);
+			if (!last) {
+				path._str += SEPARATOR;
+			}
+			return path;
+		}, tmp);
 }
 
 String Path::toConfig() const {
@@ -1181,7 +1194,19 @@ String Path::toConfig() const {
 	}
 #endif
 
-	return punycodeEncode().toString('/');
+	String tmp;
+	return reduceComponents<String &>(
+		[](String &path, const String &in, bool last) -> String & {
+			// We decode the result as Latin-1: every byte has its own value
+			// This avoids error for non UTF-8 paths
+			Common::String out = punycode_encodefilename(in.decode(kISO8859_1));
+			path += out;
+			if (!last) {
+				path += '/';
+			}
+			return path;
+		}, tmp);
+	return tmp;
 }
 
 Path Path::fromCommandLine(const String &value) {


Commit: be55ccf1a31a18ec96e9b1fee5ecaf501ed5b2d8
    https://github.com/scummvm/scummvm/commit/be55ccf1a31a18ec96e9b1fee5ecaf501ed5b2d8
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2024-11-15T09:37:18+01:00

Commit Message:
COMMON: Remove two outdated function prototypes from Path

These functions don't exist at all.

Changed paths:
    common/path.h


diff --git a/common/path.h b/common/path.h
index c435ed3e3e2..063ac58548e 100644
--- a/common/path.h
+++ b/common/path.h
@@ -164,9 +164,6 @@ private:
 	 */
 	bool compareComponents(bool (*comparator)(const String &x, const String &y), const Path &other) const;
 
-	static Path &punycode_decodefilename_helper(Path &path, const String &in, bool last);
-	static Path &punycode_encodefilename_helper(Path &path, const String &in, bool last);
-
 	/**
 	 * Determines if the path is escaped
 	 */




More information about the Scummvm-git-logs mailing list