[Scummvm-git-logs] scummvm master -> 2cad62a6ec818c6bb79410eabfce04f60f2301e3

sev- noreply at scummvm.org
Sat Jul 2 10:53:52 UTC 2022


This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
2cad62a6ec COMMON: Add string encoding API with more detailed error behavior.


Commit: 2cad62a6ec818c6bb79410eabfce04f60f2301e3
    https://github.com/scummvm/scummvm/commit/2cad62a6ec818c6bb79410eabfce04f60f2301e3
Author: elasota (ejlasota at gmail.com)
Date: 2022-07-02T12:53:50+02:00

Commit Message:
COMMON: Add string encoding API with more detailed error behavior.

Changed paths:
    common/str-enc.cpp
    common/str-enc.h
    common/str.h
    common/ustr.h


diff --git a/common/str-enc.cpp b/common/str-enc.cpp
index 5ff9bb8f8a9..cd0319597ea 100644
--- a/common/str-enc.cpp
+++ b/common/str-enc.cpp
@@ -388,7 +388,9 @@ void U32String::decodeJohab(const char *src, uint32 len) {
 }
 
 
-void String::encodeWindows932(const U32String &src) {
+StringEncodingResult String::encodeWindows932(const U32String &src, char errorChar) {
+	StringEncodingResult encodingResult = kStringEncodingResultSucceeded;
+
 	ensureCapacity(src.size() * 2, false);
 
 	if (!cjk_tables_loaded)
@@ -432,12 +434,14 @@ void String::encodeWindows932(const U32String &src) {
 		}
 
 		if (point > 0x10000) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
 		if (!windows932ReverseConversionTable) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
@@ -450,12 +454,17 @@ void String::encodeWindows932(const U32String &src) {
 
 		// This codepage contains cyrillic, so no need to transliterate
 
-		operator+=('?');
+		operator+=(errorChar);
+		encodingResult = kStringEncodingResultHasErrors;
 		continue;
 	}
+
+	return encodingResult;
 }
 
-void String::encodeWindows949(const U32String &src) {
+StringEncodingResult String::encodeWindows949(const U32String &src, char errorChar) {
+	StringEncodingResult encodingResult = kStringEncodingResultSucceeded;
+
 	ensureCapacity(src.size() * 2, false);
 
 	if (!cjk_tables_loaded)
@@ -493,20 +502,24 @@ void String::encodeWindows949(const U32String &src) {
 		}
 
 		if (point > 0x10000 || !windows949ReverseConversionTable) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
 		uint16 rev = windows949ReverseConversionTable[point];
 		if (rev == 0) {
 			// This codepage contains cyrillic, so no need to transliterate
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
 		operator+=(rev >> 8);
 		operator+=(rev & 0xff);
 	}
+
+	return encodingResult;
 }
 
 static const char g_cyrillicTransliterationTable[] = {
@@ -518,31 +531,34 @@ static const char g_cyrillicTransliterationTable[] = {
 	'e', 'e', 'd', 'g', 'e', 'z', 'i', 'i', 'j', 'l', 'n', 'c', 'k', 'i', 'u', 'd',
 };
 
-void String::translitChar(U32String::value_type point) {
+StringEncodingResult String::translitChar(U32String::value_type point, char errorChar) {
 	if (point == 0xa0) {
 		operator+=(' ');
-		return;
+		return kStringEncodingResultSucceeded;
 	}
 
 	if (point == 0xad) {
 		operator+=('-');
-		return;
+		return kStringEncodingResultSucceeded;
 	}
 
 	if (point == 0x2116) {
 		operator+=('N');
-		return;
+		return kStringEncodingResultSucceeded;
 	}
 
 	if (point >= 0x401 && point <= 0x45f) {
 		operator+=(g_cyrillicTransliterationTable[point - 0x400]);
-		return;
+		return kStringEncodingResultSucceeded;
 	}
 
-	operator+=('?');
+	operator+=(errorChar);
+	return kStringEncodingResultHasErrors;
 }
 
-void String::encodeWindows950(const U32String &src, bool transliterate) {
+StringEncodingResult String::encodeWindows950(const U32String &src, bool transliterate, char errorChar) {
+	StringEncodingResult encodingResult = kStringEncodingResultSucceeded;
+
 	ensureCapacity(src.size() * 2, false);
 
 	if (!cjk_tables_loaded)
@@ -578,7 +594,8 @@ void String::encodeWindows950(const U32String &src, bool transliterate) {
 		}
 
 		if (point > 0x10000) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
@@ -589,7 +606,8 @@ void String::encodeWindows950(const U32String &src, bool transliterate) {
 		}
 
 		if (!windows950ReverseConversionTable) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
@@ -629,16 +647,23 @@ void String::encodeWindows950(const U32String &src, bool transliterate) {
 		}
 
 		if (transliterate) {
-			translitChar(point);
+			StringEncodingResult translitResult = translitChar(point, errorChar);
+			if (translitResult != kStringEncodingResultSucceeded)
+				encodingResult = translitResult;
 			continue;
 		}
 
-		operator+=('?');
+		operator+=(errorChar);
+		encodingResult = kStringEncodingResultHasErrors;
 		continue;
 	}
+
+	return encodingResult;
 }
 
-void String::encodeJohab(const U32String &src) {
+StringEncodingResult String::encodeJohab(const U32String &src, char errorChar) {
+	StringEncodingResult encodingResult = kStringEncodingResultSucceeded;
+
 	ensureCapacity(src.size() * 2, false);
 
 	if (!cjk_tables_loaded)
@@ -671,19 +696,23 @@ void String::encodeJohab(const U32String &src) {
 		}
 
 		if (point > 0x10000 || !johabReverseConversionTable) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
 		uint16 rev = johabReverseConversionTable[point];
 		if (rev == 0) {
-			operator+=('?');
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
 			continue;
 		}
 
 		operator+=(rev >> 8);
 		operator+=(rev & 0xff);
 	}
+
+	return encodingResult;
 }
 
 // //TODO: This is a quick and dirty converter. Refactoring needed:
@@ -693,7 +722,7 @@ void String::encodeJohab(const U32String &src) {
 //    character does not fit in 4 bytes & does not inform caller on any errors
 //
 // More comprehensive one lives in wintermute/utils/convert_utf.cpp
-void String::encodeUTF8(const U32String &src) {
+StringEncodingResult String::encodeUTF8(const U32String &src, char errorChar) {
 	ensureCapacity(src.size(), false);
 	static const uint8 firstByteMark[5] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0 };
 	char writingBytes[5] = {0x00, 0x00, 0x00, 0x00, 0x00};
@@ -742,6 +771,8 @@ void String::encodeUTF8(const U32String &src) {
 
 		operator+=(pBytes);
 	}
+
+	return kStringEncodingResultSucceeded;
 }
 
 #define decodeUTF16Template(suffix, read)				\
@@ -916,7 +947,9 @@ void U32String::decodeOneByte(const char *src, uint32 len, CodePage page) {
 	}
 }
 
-void String::encodeOneByte(const U32String &src, CodePage page, bool transliterate) {
+StringEncodingResult String::encodeOneByte(const U32String &src, CodePage page, bool transliterate, char errorChar) {
+	StringEncodingResult encodingResult = kStringEncodingResultSucceeded;
+
 	const ReverseTablePrefixTreeLevel1 *conversionTable =
 		getReverseConversionTable(page);
 
@@ -931,11 +964,15 @@ void String::encodeOneByte(const U32String &src, CodePage page, bool translitera
 			}
 
 			if (transliterate) {
-				translitChar(c);
-			} else
-				operator+=('?');
+				StringEncodingResult translitResult = translitChar(c, errorChar);
+				if (translitResult != kStringEncodingResultSucceeded)
+					encodingResult = translitResult;
+			} else {
+				operator+=(errorChar);
+				encodingResult = kStringEncodingResultHasErrors;
+			}
 		}
-		return;
+		return encodingResult;
 	}
 
 	for (uint i = 0; i < src.size(); ++i) {
@@ -955,32 +992,32 @@ void String::encodeOneByte(const U32String &src, CodePage page, bool translitera
 		}
 
 		if (transliterate) {
-			translitChar(c);
-		} else
-			operator+=('?');
+			StringEncodingResult translitResult = translitChar(c, errorChar);
+			if (translitResult != kStringEncodingResultSucceeded)
+				encodingResult = translitResult;
+		} else {
+			operator+=(errorChar);
+			encodingResult = kStringEncodingResultHasErrors;
+		}
 	}
+
+	return encodingResult;
 }
 
-void String::encodeInternal(const U32String &src, CodePage page) {
+StringEncodingResult String::encodeInternal(const U32String &src, CodePage page, char errorChar) {
 	switch(page) {
 	case kUtf8:
-		encodeUTF8(src);
-		break;
+		return encodeUTF8(src, errorChar);
 	case kWindows932:
-		encodeWindows932(src);
-		break;
+		return encodeWindows932(src, errorChar);
 	case kWindows949:
-		encodeWindows949(src);
-		break;
+		return encodeWindows949(src, errorChar);
 	case kWindows950:
-		encodeWindows950(src);
-		break;
+		return encodeWindows950(src, true, errorChar);
 	case kJohab:
-		encodeJohab(src);
-		break;
+		return encodeJohab(src, errorChar);
 	default:
-		encodeOneByte(src, page);
-		break;
+		return encodeOneByte(src, page, true, errorChar);
 	}
 }
 
@@ -1040,14 +1077,18 @@ U32String String::decode(CodePage page) const {
 }
 
 String U32String::encode(CodePage page) const {
+	String string;
+	(void)encode(string, page, '?');
+	return string;
+}
+
+StringEncodingResult U32String::encode(String &outString, CodePage page, char errorChar) const {
 	if (page == kCodePageInvalid ||
 			page > kLastEncoding) {
 		error("Invalid codepage");
 	}
 
-	String string;
-	string.encodeInternal(*this, page);
-	return string;
+	return outString.encodeInternal(*this, page, errorChar);
 }
 
 } // End of namespace Common
diff --git a/common/str-enc.h b/common/str-enc.h
index ca866fe2b1e..ba3c122e4a5 100644
--- a/common/str-enc.h
+++ b/common/str-enc.h
@@ -57,6 +57,11 @@ enum CodePage {
 	kLastEncoding = kASCII
 };
 
+enum StringEncodingResult {
+	kStringEncodingResultSucceeded,
+	kStringEncodingResultHasErrors,
+};
+
 U32String convertUtf8ToUtf32(const String &str);
 String convertUtf32ToUtf8(const U32String &str);
 
diff --git a/common/str.h b/common/str.h
index e3b46991f37..6ad6c4513a4 100644
--- a/common/str.h
+++ b/common/str.h
@@ -246,14 +246,14 @@ public:
 	U32String decode(CodePage page = kUtf8) const;
 
 protected:
-	void encodeUTF8(const U32String &src);
-	void encodeWindows932(const U32String &src);
-	void encodeWindows949(const U32String &src);
-	void encodeWindows950(const U32String &src, bool translit = true);
-	void encodeJohab(const U32String &src);
-	void encodeOneByte(const U32String &src, CodePage page, bool translit = true);
-	void encodeInternal(const U32String &src, CodePage page);
-	void translitChar(U32String::value_type point);
+	StringEncodingResult encodeUTF8(const U32String &src, char errorChar);
+	StringEncodingResult encodeWindows932(const U32String &src, char errorChar);
+	StringEncodingResult encodeWindows949(const U32String &src, char errorChar);
+	StringEncodingResult encodeWindows950(const U32String &src, bool translit, char errorChar);
+	StringEncodingResult encodeJohab(const U32String &src, char errorChar);
+	StringEncodingResult encodeOneByte(const U32String &src, CodePage page, bool translit, char errorChar);
+	StringEncodingResult encodeInternal(const U32String &src, CodePage page, char errorChar);
+	StringEncodingResult translitChar(U32String::value_type point, char errorChar);
 
 	friend class U32String;
 };
diff --git a/common/ustr.h b/common/ustr.h
index 5e0523f4373..904a34cdcab 100644
--- a/common/ustr.h
+++ b/common/ustr.h
@@ -127,6 +127,10 @@ public:
 	/** Convert the string to the given @p page encoding and return the result as a new String. */
 	String encode(CodePage page = kUtf8) const;
 
+	/** Convert the string to the given @p page encoding and output in string @p outString,
+		replacing invalid characters with @p errorChar. */
+	StringEncodingResult encode(String &outString, CodePage page, char errorChar) const;
+
 	/**
 	 * Print formatted data into a U32String object.
 	 *




More information about the Scummvm-git-logs mailing list