[Scummvm-git-logs] scummvm master -> b89b946e7da70492b43d0f627b9ffa338814d410
fracturehill
noreply at scummvm.org
Tue Jan 16 21:31:17 UTC 2024
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
b89b946e7d COMMON: Allow extracting delimiter info from Tokenizer
Commit: b89b946e7da70492b43d0f627b9ffa338814d410
https://github.com/scummvm/scummvm/commit/b89b946e7da70492b43d0f627b9ffa338814d410
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2024-01-16T23:31:13+02:00
Commit Message:
COMMON: Allow extracting delimiter info from Tokenizer
Extended the StringTokenizer and U32StringTokenizer
classes with functions that provide the caller with a list
of all delimiters surrounding the last processed token.
Changed paths:
common/tokenizer.cpp
common/tokenizer.h
diff --git a/common/tokenizer.cpp b/common/tokenizer.cpp
index 45effce118a..79d79854e4f 100644
--- a/common/tokenizer.cpp
+++ b/common/tokenizer.cpp
@@ -61,6 +61,36 @@ StringArray StringTokenizer::split() {
return res;
}
+String StringTokenizer::delimitersAtTokenBegin() const {
+ // First token appears at beginning of the string, or no tokens have been extracted yet
+ if (_tokenBegin == 0)
+ return String();
+
+ // Iterate backwards until we hit either the previous token, or the beginning of the input string
+ int delimitersBegin;
+ for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= 0 && _delimiters.contains(_str[delimitersBegin]); delimitersBegin--)
+ ;
+
+ ++delimitersBegin;
+
+ // Return the delimiters
+ return String(_str.c_str() + delimitersBegin, _tokenBegin - delimitersBegin);
+}
+
+String StringTokenizer::delimitersAtTokenEnd() const {
+ // Last token appears at end of the string, or no tokens have been extracted yet
+ if (_tokenEnd == 0 || _tokenEnd == _str.size())
+ return String();
+
+ // Iterate forwards until we hit either the next token, or the end of the input string
+ uint delimitersEnd;
+ for (delimitersEnd = _tokenEnd; delimitersEnd < _str.size() && _delimiters.contains(_str[delimitersEnd]); delimitersEnd++)
+ ;
+
+ // Return the delimiters
+ return String(_str.c_str() + _tokenEnd, delimitersEnd - _tokenEnd);
+}
+
U32StringTokenizer::U32StringTokenizer(const U32String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
reset();
}
@@ -85,18 +115,15 @@ U32String U32StringTokenizer::nextToken() {
// Skip delimiters when present at the beginning, to point to the next token
// For example, the below loop will set _tokenBegin & _tokenEnd to 'H' for the string -> "!!--=Hello World"
// And subsequently, skip all delimiters in the beginning of the next word.
- while (_tokenBegin != _str.end() && _delimiters.contains(*_tokenBegin)) {
+ _tokenBegin = _tokenEnd;
+ while (_tokenBegin != _str.end() && _delimiters.contains(*_tokenBegin))
_tokenBegin++;
- _tokenEnd++;
- }
+ _tokenEnd = _tokenBegin;
// Loop and advance _tokenEnd until we find a delimiter at the end of a word/string
while (_tokenBegin != _str.end() && _tokenEnd != _str.end()) {
if (_delimiters.contains(*_tokenEnd)) {
- U32String token(_tokenBegin, _tokenEnd);
- _tokenEnd++;
- _tokenBegin = _tokenEnd;
- return token;
+ return U32String(_tokenBegin, _tokenEnd);
}
_tokenEnd++;
}
@@ -117,5 +144,35 @@ U32StringArray U32StringTokenizer::split() {
return res;
}
+U32String U32StringTokenizer::delimitersAtTokenBegin() const {
+ // First token appears at beginning of the string, or no tokens have been extracted yet
+ if (_tokenBegin == _str.begin())
+ return U32String();
+
+ // Iterate backwards until we hit either the previous token, or the beginning of the input string
+ U32String::const_iterator delimitersBegin;
+ for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= _str.begin() && _delimiters.contains(*delimitersBegin); delimitersBegin--)
+ ;
+
+ ++delimitersBegin;
+
+ // Return the delimiters
+ return U32String(delimitersBegin, _tokenBegin - delimitersBegin);
+}
+
+U32String U32StringTokenizer::delimitersAtTokenEnd() const {
+ // Last token appears at end of the string, or no tokens have been extracted yet
+ if (_tokenEnd == _str.begin() || _tokenEnd == _str.end())
+ return String();
+
+ // Iterate forwards until we hit either the next token, or the end of the input string
+ U32String::const_iterator delimitersEnd;
+ for (delimitersEnd = _tokenEnd; delimitersEnd < _str.end() && _delimiters.contains(*delimitersEnd); delimitersEnd++)
+ ;
+
+ // Return the delimiters
+ return U32String(_tokenEnd, delimitersEnd - _tokenEnd);
+}
+
} // End of namespace Common
diff --git a/common/tokenizer.h b/common/tokenizer.h
index 009b2edb9d8..b5d8fb1f24a 100644
--- a/common/tokenizer.h
+++ b/common/tokenizer.h
@@ -56,6 +56,9 @@ public:
String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
+ String delimitersAtTokenBegin() const; ///< Returns a String with all delimiters between the current and previous token
+ String delimitersAtTokenEnd() const; ///< Returns a String with all delimiters between the current and next token
+
private:
const String _str; ///< The string to be tokenized
const String _delimiters; ///< String containing all the delimiter characters
@@ -84,6 +87,9 @@ public:
U32String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)
U32StringArray split(); ///< Returns StringArray with all tokens. Beware of the memory usage
+ U32String delimitersAtTokenBegin() const; ///< Returns a U32String with all delimiters between the current and previous token
+ U32String delimitersAtTokenEnd() const; ///< Returns a U32String with all delimiters between the current and next token
+
private:
const U32String _str; ///< The unicode string to be tokenized
const String _delimiters; ///< String containing all the delimiter characters
More information about the Scummvm-git-logs
mailing list