Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 51 additions & 6 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,57 @@ bool Lexer::takeRParen() {
return false;
}

std::optional<std::string_view> Lexer::takeKeyword() {
if (curr) {
return std::nullopt;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this part? It reads as "if there is a current token, ignore it and return nullopt". Why is that correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This goes away at the end of the patch sequence, but it is required for correctness at these intermediate steps. Let's say the lexer reaches an integer and eagerly lexes it into a token stored in curr. That integer may be followed by a keyword, but takeKeyword() should fail until takeU32() (or similar) has been called first. Without this check, takeKeyword() would succeed even though the preceeding integer was never consumed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks. Which is the PR where I can see the final step where it goes away, to get an idea for the direction?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
if (auto result = keyword(next())) {
index += result->span.size();
advance();
return result->span;
}
return std::nullopt;
}

bool Lexer::takeKeyword(std::string_view expected) {
if (auto result = keyword(next()); result && result->span == expected) {
index += expected.size();
advance();
return true;
}
return false;
}

std::optional<uint64_t> Lexer::takeOffset() {
if (auto result = keyword(next())) {
if (result->span.substr(0, 7) != "offset="sv) {
return std::nullopt;
}
Lexer subLexer(result->span.substr(7));
if (auto o = subLexer.takeU64()) {
index += result->span.size();
advance();
return o;
}
}
return std::nullopt;
}

std::optional<uint32_t> Lexer::takeAlign() {
if (auto result = keyword(next())) {
if (result->span.substr(0, 6) != "align="sv) {
return std::nullopt;
}
Lexer subLexer(result->span.substr(6));
if (auto o = subLexer.takeU32()) {
index += result->span.size();
advance();
return o;
}
}
return std::nullopt;
}

void Lexer::lexToken() {
// TODO: Ensure we're getting the longest possible match.
Token tok;
Expand All @@ -1080,8 +1131,6 @@ void Lexer::lexToken() {
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
} else if (auto t = str(next())) {
tok = Token{t->span, StringTok{t->str}};
} else if (auto t = keyword(next())) {
tok = Token{t->span, KeywordTok{}};
} else {
// TODO: Do something about lexing errors.
curr = std::nullopt;
Expand Down Expand Up @@ -1163,10 +1212,6 @@ std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
return os;
}

std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
return os << "keyword";
}

std::ostream& operator<<(std::ostream& os, const Token& tok) {
std::visit([&](const auto& t) { os << t; }, tok.data);
return os << " \"" << tok.span << "\"";
Expand Down
84 changes: 5 additions & 79 deletions src/parser/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,15 @@ struct StringTok {
friend std::ostream& operator<<(std::ostream&, const StringTok&);
};

struct KeywordTok {
bool operator==(const KeywordTok&) const { return true; }
friend std::ostream& operator<<(std::ostream&, const KeywordTok&);
};

struct Token {
using Data = std::variant<IdTok, IntTok, FloatTok, StringTok, KeywordTok>;
using Data = std::variant<IdTok, IntTok, FloatTok, StringTok>;
std::string_view span;
Data data;

// ====================
// Token classification
// ====================

std::optional<std::string_view> getKeyword() const {
if (std::get_if<KeywordTok>(&data)) {
return span;
}
return {};
}

template<typename T> std::optional<T> getU() const;
template<typename T> std::optional<T> getS() const;
template<typename T> std::optional<T> getI() const;
Expand Down Expand Up @@ -187,77 +175,15 @@ struct Lexer {
return {};
}

std::optional<std::string_view> takeKeyword() {
if (curr) {
if (auto keyword = curr->getKeyword()) {
advance();
return *keyword;
}
}
return {};
}
std::optional<std::string_view> takeKeyword();
bool takeKeyword(std::string_view expected);

std::optional<std::string_view> peekKeyword() {
return Lexer(*this).takeKeyword();
}

bool takeKeyword(std::string_view expected) {
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (*keyword == expected) {
advance();
return true;
}
}
}
return false;
}

std::optional<uint64_t> takeOffset() {
using namespace std::string_view_literals;
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (keyword->substr(0, 7) != "offset="sv) {
return {};
}
Lexer subLexer(keyword->substr(7));
if (subLexer.empty()) {
return {};
}
if (auto o = subLexer.curr->getU<uint64_t>()) {
subLexer.advance();
if (subLexer.empty()) {
advance();
return o;
}
}
}
}
return std::nullopt;
}

std::optional<uint32_t> takeAlign() {
using namespace std::string_view_literals;
if (curr) {
if (auto keyword = curr->getKeyword()) {
if (keyword->substr(0, 6) != "align="sv) {
return {};
}
Lexer subLexer(keyword->substr(6));
if (subLexer.empty()) {
return {};
}
if (auto a = subLexer.curr->getU<uint32_t>()) {
subLexer.advance();
if (subLexer.empty()) {
advance();
return a;
}
}
}
}
return {};
}
std::optional<uint64_t> takeOffset();
std::optional<uint32_t> takeAlign();

template<typename T> std::optional<T> takeU() {
if (curr) {
Expand Down