Allow underscore separator for numbers

This commit is contained in:
Rafał Grodziński
2025-06-22 13:47:11 +09:00
parent 47293b4cf5
commit 8524883791
3 changed files with 30 additions and 18 deletions

View File

@@ -4,7 +4,7 @@ project(
VERSION 0.1.0 VERSION 0.1.0
LANGUAGES CXX C) LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_COLOR_DIAGNOSTICS OFF) set(CMAKE_COLOR_DIAGNOSTICS OFF)
find_package(LLVM REQUIRED CONFIG) find_package(LLVM REQUIRED CONFIG)

View File

@@ -271,10 +271,12 @@ shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator
shared_ptr<Token> Lexer::matchIntegerDec() { shared_ptr<Token> Lexer::matchIntegerDec() {
int nextIndex = currentIndex; int nextIndex = currentIndex;
while (nextIndex < source.length() && isDecDigit(nextIndex)) // Include _ which is not on the first position
while (nextIndex < source.length() && (isDecDigit(nextIndex) || (nextIndex > currentIndex && source.at(nextIndex) == '_')))
nextIndex++; nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex)) // Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr; return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
@@ -293,10 +295,12 @@ shared_ptr<Token> Lexer::matchIntegerHex() {
if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'x') if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'x')
return nullptr; return nullptr;
while (nextIndex < source.length() && isHexDigit(nextIndex)) // Include _ which is not on the first position
while (nextIndex < source.length() && (isHexDigit(nextIndex) || (nextIndex > currentIndex+2 && source.at(nextIndex) == '_')))
nextIndex++; nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex)) // Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex+2 || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr; return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);
@@ -315,10 +319,12 @@ shared_ptr<Token> Lexer::matchIntegerBin() {
if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'b') if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'b')
return nullptr; return nullptr;
while (nextIndex < source.length() && isBinDigit(nextIndex)) // Include _ which is not on the first position
while (nextIndex < source.length() && (isBinDigit(nextIndex) || (nextIndex > currentIndex+2 && source.at(nextIndex) == '_')))
nextIndex++; nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex)) // Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr; return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex); string lexme = source.substr(currentIndex, nextIndex - currentIndex);

View File

@@ -130,22 +130,28 @@ Expression(ExpressionKind::LITERAL, ValueType::NONE) {
boolValue = token->getLexme().compare("true") == 0; boolValue = token->getLexme().compare("true") == 0;
valueType = ValueType::BOOL; valueType = ValueType::BOOL;
break; break;
case TokenKind::INTEGER_DEC: case TokenKind::INTEGER_DEC: {
sint32Value = stoi(token->getLexme(), nullptr, 10); string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 10);
valueType = ValueType::SINT32; valueType = ValueType::SINT32;
break; break;
case TokenKind::INTEGER_HEX: }
sint32Value = stoi(token->getLexme(), nullptr, 16); case TokenKind::INTEGER_HEX: {
string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 16);
valueType = ValueType::SINT32; valueType = ValueType::SINT32;
break; break;
case TokenKind::INTEGER_BIN: }
sint32Value = stoi( case TokenKind::INTEGER_BIN: {
token->getLexme().substr(2, token->getLexme().size()-1), string numString = token->getLexme();
nullptr, erase(numString, '_');
2 numString = numString.substr(2, numString.size()-1);
); sint32Value = stoi(numString, nullptr, 2);
valueType = ValueType::SINT32; valueType = ValueType::SINT32;
break; break;
}
case TokenKind::REAL: case TokenKind::REAL:
real32Value = stof(token->getLexme()); real32Value = stof(token->getLexme());
valueType = ValueType::REAL32; valueType = ValueType::REAL32;