[Scummvm-git-logs] scummvm master -> 33ede4811291cc866142ef807a579094a5062c94

Mon May 26 20:33:28 UTC 2025

This automated email contains information about 3 new commits which have been
pushed to the 'scummvm' repo located at https://api.github.com/repos/scummvm/scummvm .

Summary:
3f6370400f AGI: Use SeekableSubReadStream for WORDS from disk images
8d61af14ad AGI: Cleanup WORDS parser, dictionary structure
33ede48112 AGI: Allow words to start with digits in fan games


Commit: 3f6370400fc41175cb3fc45fc183f6394aef7ff4
    https://github.com/scummvm/scummvm/commit/3f6370400fc41175cb3fc45fc183f6394aef7ff4
Author: sluicebox (22204938+sluicebox at users.noreply.github.com)
Date: 2025-05-26T13:30:51-07:00

Commit Message:
AGI: Use SeekableSubReadStream for WORDS from disk images

Changed paths:
    engines/agi/loader_a2.cpp
    engines/agi/loader_v1.cpp

diff --git a/engines/agi/loader_a2.cpp b/engines/agi/loader_a2.cpp
index 180ba771b6d..83247befddb 100644
--- a/engines/agi/loader_a2.cpp
+++ b/engines/agi/loader_a2.cpp
@@ -27,6 +27,7 @@
 #include "common/formats/disk_image.h"
 #include "common/fs.h"
 #include "common/memstream.h"
+#include "common/substream.h"
 
 namespace Agi {
 
@@ -451,13 +452,11 @@ int AgiLoader_A2::loadWords() {
 		return errFilesNotFound;
 	}
 
-	// TODO: pass length and validate in parser
-	Common::SeekableReadStream &disk = *_disks[0];
-	disk.seek(_words.offset);
+	Common::SeekableSubReadStream words(_disks[0], _words.offset, _words.offset + _words.len);
 	if (_vm->getVersion() < 0x2000) {
-		return _vm->_words->loadDictionary_v1(disk);
+		return _vm->_words->loadDictionary_v1(words);
 	} else {
-		return _vm->_words->loadDictionary(disk);
+		return _vm->_words->loadDictionary(words);
 	}
 }
 
diff --git a/engines/agi/loader_v1.cpp b/engines/agi/loader_v1.cpp
index 3d860a2484d..e5a1edf7db2 100644
--- a/engines/agi/loader_v1.cpp
+++ b/engines/agi/loader_v1.cpp
@@ -25,6 +25,7 @@
 #include "agi/words.h"
 
 #include "common/fs.h"
+#include "common/substream.h"
 
 namespace Agi {
 
@@ -380,9 +381,8 @@ int AgiLoader_v1::loadWords() {
 		return errBadFileOpen;
 	}
 
-	// TODO: pass length and validate in parser
-	disk.seek(_words.offset);
-	return _vm->_words->loadDictionary_v1(disk);
+	Common::SeekableSubReadStream words(&disk, _words.offset, _words.offset + _words.len);
+	return _vm->_words->loadDictionary_v1(words);
 }
 
 } // End of namespace Agi


Commit: 8d61af14adc00477ec9271178f0eeaa3675efe45
    https://github.com/scummvm/scummvm/commit/8d61af14adc00477ec9271178f0eeaa3675efe45
Author: sluicebox (22204938+sluicebox at users.noreply.github.com)
Date: 2025-05-26T13:30:51-07:00

Commit Message:
AGI: Cleanup WORDS parser, dictionary structure

A little less allocation, a little more hashin'

- Removes over 100 rarely used Array objects.
- Fixes memory leak when using WORDS.TOK.EXTENDED.
- Validates more error conditions when parsing WORDS.TOK.

Changed paths:
    engines/agi/words.cpp
    engines/agi/words.h


diff --git a/engines/agi/words.cpp b/engines/agi/words.cpp
index 6889d025495..529d7a84470 100644
--- a/engines/agi/words.cpp
+++ b/engines/agi/words.cpp
@@ -47,22 +47,18 @@ int Words::loadDictionary_v1(Common::SeekableReadStream &stream) {
 	stream.seek(26 * 2, SEEK_CUR);
 	do {
 		// Read next word
-		for (k = 0; k < (int)sizeof(str) - 1; k++) {
+		for (k = 0; k < ARRAYSIZE(str) - 1; k++) {
 			str[k] = stream.readByte();
 			if (str[k] == 0 || (uint8)str[k] == 0xFF)
 				break;
 		}
 
-		// And store it in our internal dictionary
+		// Store word in dictionary
 		if (k > 0) {
-			WordEntry *newWord = new WordEntry;
-			byte firstCharNr = str[0] - 'a';
-
-			newWord->word = Common::String(str, k + 1); // myStrndup(str, k + 1);
-			newWord->id = stream.readUint16LE();
-
-			_dictionaryWords[firstCharNr].push_back(newWord);
-			debug(3, "'%s' (%d)", newWord->word.c_str(), newWord->id);
+			WordEntry newWord;
+			newWord.word = Common::String(str, k + 1);
+			newWord.id = stream.readUint16LE();
+			_dictionary[str[0]].push_back(newWord);
 		}
 	} while ((uint8)str[0] != 0xFF);
 
@@ -82,38 +78,46 @@ int Words::loadDictionary(const char *fname) {
 }
 
 int Words::loadDictionary(Common::SeekableReadStream &stream) {
-	// Loop through alphabet, as words in the dictionary file are sorted by
-	// first character
-	uint32 start = stream.pos();
-	char str[64] = { 0 };
-	char c;
+	// Read words for each letter (A-Z)
+	const uint32 start = stream.pos();
 	for (int i = 0; i < 26; i++) {
+		// Read letter index and seek to first word
 		stream.seek(start + i * 2);
 		int offset = stream.readUint16BE();
-		if (offset == 0)
-			continue;
+		if (offset == 0) {
+			continue; // no words
+		}
 		stream.seek(start + offset);
-		int k = stream.readByte();
-		while (!stream.eos() && !stream.err()) {
-			// Read next word
-			do {
+
+		// Read all words in this letter's section
+		char str[64] = { 0 };
+		int prevWordLength = 0;
+		int k = stream.readByte(); // copy-count of first word
+		while (!stream.eos() && !stream.err() && k <= prevWordLength) {
+			// Read word
+			char c = 0;
+			while (!(c & 0x80) && k < ARRAYSIZE(str) - 1) {
 				c = stream.readByte();
 				str[k++] = (c ^ 0x7F) & 0x7F;
-			} while (!(c & 0x80) && k < (int)sizeof(str) - 1);
+			}
 			str[k] = 0;
 
+			// Read word id
+			uint16 wordId = stream.readUint16BE();
+			if (stream.eos() || stream.err()) {
+				break;
+			}
+
 			// WORKAROUND:
 			// The SQ0 fan game stores words starting with numbers (like '7up')
 			// in its dictionary under the 'a' entry. We skip these.
 			// See bug #6415
 			if (str[0] == 'a' + i) {
-				// And store it in our internal dictionary
-				WordEntry *newWord = new WordEntry;
-				newWord->word = Common::String(str, k);
-				newWord->id = stream.readUint16BE();
-				_dictionaryWords[i].push_back(newWord);
-			} else {
-				stream.readUint16BE();
+				// Store word in dictionary
+				WordEntry newWord;
+				newWord.word = Common::String(str, k);
+				newWord.id = wordId
+				_dictionary[str[0]].push_back(newWord);
 			}
 
 			k = stream.readByte();
@@ -122,8 +126,10 @@ int Words::loadDictionary(Common::SeekableReadStream &stream) {
 			// WORKAROUND: We only break after already seeing words with the
 			// right prefix, for the SQ0 words starting with digits filed under
 			// 'a'. See above comment and bug #6415.
-			if (k == 0 && str[0] >= 'a' + i)
+			if (k == 0 && str[0] == 'a' + i) {
 				break;
+			}
+			prevWordLength = k;
 		}
 	}
 
@@ -146,16 +152,11 @@ int Words::loadExtendedDictionary(const char *sierraFname) {
 	fp.readString('\n');
 
 	while (!fp.eos() && !fp.err()) {
-		Common::String word = fp.readString();
-		uint16 id = atoi(fp.readString('\n').c_str());
-		if (!word.empty()) {
-			byte index = (byte)word[0] - 'a';
-			if (index < ARRAYSIZE(_dictionaryWords)) {
-				WordEntry *newWord = new WordEntry();
-				newWord->word = word;
-				newWord->id = id;
-				_dictionaryWords[index].push_back(newWord);
-			}
+		WordEntry newWord;
+		newWord.word = fp.readString();
+		newWord.id = atoi(fp.readString('\n').c_str());
+		if (!newWord.word.empty()) {
+			_dictionary[(byte)newWord.word[0]].push_back(newWord);
 		}
 	}
 
@@ -163,16 +164,7 @@ int Words::loadExtendedDictionary(const char *sierraFname) {
 }
 
 void Words::unloadDictionary() {
-	for (int16 firstCharNr = 0; firstCharNr < 26; firstCharNr++) {
-		Common::Array<WordEntry *> &dictionary = _dictionaryWords[firstCharNr];
-		int16 dictionarySize = dictionary.size();
-
-		for (int16 dictionaryWordNr = 0; dictionaryWordNr < dictionarySize; dictionaryWordNr++) {
-			delete dictionary[dictionaryWordNr];
-		}
-
-		_dictionaryWords[firstCharNr].clear();
-	}
+	_dictionary.clear();
 }
 
 void Words::clearEgoWords() {
@@ -270,12 +262,9 @@ int16 Words::findWordInDictionary(const Common::String &userInputLowercase, uint
 			}
 		}
 
-		Common::Array<WordEntry *> &dictionary = _dictionaryWords[firstChar - 'a'];
-		int16 dictionarySize = dictionary.size();
-
-		for (int16 dictionaryWordNr = 0; dictionaryWordNr < dictionarySize; dictionaryWordNr++) {
-			WordEntry *dictionaryEntry = dictionary[dictionaryWordNr];
-			uint16 dictionaryWordLen = dictionaryEntry->word.size();
+		const Common::Array<WordEntry> &words = _dictionary.getValOrDefault(firstChar);
+		for (const WordEntry &wordEntry : words) {
+			uint16 dictionaryWordLen = wordEntry.word.size();
 
 			if (dictionaryWordLen <= userInputLeft) {
 				// dictionary word is longer or same length as the remaining user input
@@ -285,7 +274,7 @@ int16 Words::findWordInDictionary(const Common::String &userInputLowercase, uint
 				userInputPos = wordStartPos;
 				while (curCompareLeft) {
 					byte curUserInputChar = userInputLowercase[userInputPos];
-					byte curDictionaryChar = dictionaryEntry->word[dictionaryWordPos];
+					byte curDictionaryChar = wordEntry.word[dictionaryWordPos];
 
 					if (curUserInputChar != curDictionaryChar)
 						break;
@@ -299,7 +288,7 @@ int16 Words::findWordInDictionary(const Common::String &userInputLowercase, uint
 					// check, if there is also nothing more of user input left or if a space the follow-up char?
 					if ((userInputPos >= userInputLen) || (userInputLowercase[userInputPos] == ' ')) {
 						// so fully matched, remember match
-						wordId = dictionaryEntry->id;
+						wordId = wordEntry.id;
 						foundWordLen = dictionaryWordLen;
 
 						// perfect match? -> exit loop
diff --git a/engines/agi/words.h b/engines/agi/words.h
index f85eb45f96a..a372ca1fe00 100644
--- a/engines/agi/words.h
+++ b/engines/agi/words.h
@@ -40,9 +40,10 @@ public:
 private:
 	AgiEngine *_vm;
 
-	// Dictionary
-	// 158 = 255 - 'a' ; that's allows us to support extended character set, and does no harm for regular English games
-	Common::Array<WordEntry *> _dictionaryWords[158];
+	// Dictionary of words in WORDS.TOK or WORDS.TOK.EXTENDED.
+	// key:   first character of the word
+	// value: words in the order they appear
+	Common::HashMap<byte, Common::Array<WordEntry>> _dictionary;
 
 	WordEntry _egoWords[MAX_WORDS];
 	uint16  _egoWordCount;


Commit: 33ede4811291cc866142ef807a579094a5062c94
    https://github.com/scummvm/scummvm/commit/33ede4811291cc866142ef807a579094a5062c94
Author: sluicebox (22204938+sluicebox at users.noreply.github.com)
Date: 2025-05-26T13:30:52-07:00

Commit Message:
AGI: Allow words to start with digits in fan games

These words appear in at least 24 fan games and one fan translation.

This does not change input parsing for games without these words.

Changed paths:
    engines/agi/words.cpp


diff --git a/engines/agi/words.cpp b/engines/agi/words.cpp
index 529d7a84470..ddb18a8c5fe 100644
--- a/engines/agi/words.cpp
+++ b/engines/agi/words.cpp
@@ -77,6 +77,16 @@ int Words::loadDictionary(const char *fname) {
 	return loadDictionary(fp);
 }
 
+/**
+ * Load all words from WORDS.TOK into the dictionary.
+ *
+ * Note that this parser handles words that start with a digit. These appear in
+ * fan games because AGI Studio allowed them and placed them at the start of the
+ * 'A' section. These words had no effect because the interpreter only matched
+ * user input that began with A-Z, and the matching logic happened to skip words
+ * until it reached one with the expected first letter. In the past, these words 
+ * caused problems for our parser. See bugs #6415, #15000
+ */
 int Words::loadDictionary(Common::SeekableReadStream &stream) {
 	// Read words for each letter (A-Z)
 	const uint32 start = stream.pos();
@@ -108,24 +118,17 @@ int Words::loadDictionary(Common::SeekableReadStream &stream) {
 				break;
 			}
 
-			// WORKAROUND:
-			// The SQ0 fan game stores words starting with numbers (like '7up')
-			// in its dictionary under the 'a' entry. We skip these.
-			// See bug #6415
-			if (str[0] == 'a' + i) {
-				// Store word in dictionary
-				WordEntry newWord;
-				newWord.word = Common::String(str, k);
-				newWord.id = wordId
-				_dictionary[str[0]].push_back(newWord);
-			}
+			// Store word in dictionary
+			WordEntry newWord;
+			newWord.word = Common::String(str, k);
+			newWord.id = wordId;
+			_dictionary[str[0]].push_back(newWord);
 
+			// Read next word's copy count, or this letter's zero terminator.
+			// Stop on zero if the word we read begins with the expected letter,
+			// otherwise this is a fan game and we just read a word that starts
+			// with a digit at the start of the 'A' section. Bugs #6413, #15000
 			k = stream.readByte();
-
-			// Are there more words with an already known prefix?
-			// WORKAROUND: We only break after already seeing words with the
-			// right prefix, for the SQ0 words starting with digits filed under
-			// 'a'. See above comment and bug #6415.
 			if (k == 0 && str[0] == 'a' + i) {
 				break;
 			}
@@ -252,8 +255,12 @@ int16 Words::findWordInDictionary(const Common::String &userInputLowercase, uint
 
 	const byte lastCharInAbc = _vm->getFeatures() & GF_EXTCHAR ? 0xff : 'z';
 
-	if ((firstChar >= 'a') && (firstChar <= lastCharInAbc)) {
-		// word has to start with a letter
+	// Words normally have to start with a letter.
+	// ENHANCEMENT: Fan games and translations include words that start with a
+	// digit, even though the original interpreter ignored them. We allow input
+	// words to start with a digit if the dictionary contains such a word.
+	if (('a' <= firstChar && firstChar <= lastCharInAbc) ||
+		('0' <= firstChar && firstChar <= '9' && _dictionary.contains(firstChar))) {
 		if (((userInputPos + 1) < userInputLen) && (userInputLowercase[userInputPos + 1] == ' ')) {
 			// current word is 1 char only?
 			if ((firstChar == 'a') || (firstChar == 'i')) {