Allow underscore separator for numbers

This commit is contained in:
Rafał Grodziński
2025-06-22 13:47:11 +09:00
parent 47293b4cf5
commit 8524883791
3 changed files with 30 additions and 18 deletions

View File

@@ -4,7 +4,7 @@ project(
VERSION 0.1.0
LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_COLOR_DIAGNOSTICS OFF)
find_package(LLVM REQUIRED CONFIG)

View File

@@ -271,10 +271,12 @@ shared_ptr<Token> Lexer::match(TokenKind kind, string lexme, bool needsSeparator
shared_ptr<Token> Lexer::matchIntegerDec() {
int nextIndex = currentIndex;
while (nextIndex < source.length() && isDecDigit(nextIndex))
// Include _ which is not on the first position
while (nextIndex < source.length() && (isDecDigit(nextIndex) || (nextIndex > currentIndex && source.at(nextIndex) == '_')))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
// Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
@@ -293,10 +295,12 @@ shared_ptr<Token> Lexer::matchIntegerHex() {
if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'x')
return nullptr;
while (nextIndex < source.length() && isHexDigit(nextIndex))
// Include _ which is not on the first position
while (nextIndex < source.length() && (isHexDigit(nextIndex) || (nextIndex > currentIndex+2 && source.at(nextIndex) == '_')))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
// Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex+2 || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);
@@ -315,10 +319,12 @@ shared_ptr<Token> Lexer::matchIntegerBin() {
if (source.at(nextIndex++) != '0' || source.at(nextIndex++) != 'b')
return nullptr;
while (nextIndex < source.length() && isBinDigit(nextIndex))
// Include _ which is not on the first position
while (nextIndex < source.length() && (isBinDigit(nextIndex) || (nextIndex > currentIndex+2 && source.at(nextIndex) == '_')))
nextIndex++;
if (nextIndex == currentIndex || !isSeparator(nextIndex))
// Resulting number shouldn't be empty, should be separated on the right, and _ shouldn't be the last character
if (nextIndex == currentIndex || !isSeparator(nextIndex) || source.at(nextIndex-1) == '_')
return nullptr;
string lexme = source.substr(currentIndex, nextIndex - currentIndex);

View File

@@ -130,22 +130,28 @@ Expression(ExpressionKind::LITERAL, ValueType::NONE) {
boolValue = token->getLexme().compare("true") == 0;
valueType = ValueType::BOOL;
break;
case TokenKind::INTEGER_DEC:
sint32Value = stoi(token->getLexme(), nullptr, 10);
case TokenKind::INTEGER_DEC: {
string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 10);
valueType = ValueType::SINT32;
break;
case TokenKind::INTEGER_HEX:
sint32Value = stoi(token->getLexme(), nullptr, 16);
}
case TokenKind::INTEGER_HEX: {
string numString = token->getLexme();
erase(numString, '_');
sint32Value = stoi(numString, nullptr, 16);
valueType = ValueType::SINT32;
break;
case TokenKind::INTEGER_BIN:
sint32Value = stoi(
token->getLexme().substr(2, token->getLexme().size()-1),
nullptr,
2
);
}
case TokenKind::INTEGER_BIN: {
string numString = token->getLexme();
erase(numString, '_');
numString = numString.substr(2, numString.size()-1);
sint32Value = stoi(numString, nullptr, 2);
valueType = ValueType::SINT32;
break;
}
case TokenKind::REAL:
real32Value = stof(token->getLexme());
valueType = ValueType::REAL32;