#include <iostream> #include <regex> #include <map> #include "lex.h" /** * Used in previous assignment to print value of token */ static std::map<Token, std::string> tokenPrint { {PRINT, "PRINT"}, {IF, "IF"}, {BEGIN, "BEGIN"}, {END, "END"}, {THEN, "THEN"}, {IDENT, "IDENT"}, {ICONST, "ICONST"}, {SCONST, "SCONST"}, {RCONST, "RCONST"}, {PLUS, "PLUS"}, {MINUS, "MINUS"}, {MULT, "MULT"}, {DIV, "DIV"}, {EQ, "EQ"}, {LPAREN, "LPAREN"}, {RPAREN, "RPAREN"}, {SCOMA, "SCOMA"}, {COMA, "COMA"}, {ERR, "ERR"}, {DONE, "DONE"} }; /** * Operator overloading << for Token object */ ostream& operator<<(ostream& out, const LexItem& tok) { std::string *token = &tokenPrint[tok.GetToken()]; std::cout << *token; bool eval = (tok.GetToken() == SCONST) || (tok.GetToken() == RCONST) || (tok.GetToken() == ICONST) || (tok.GetToken() == IDENT) || (tok.GetToken() == ERR); if (eval) std::cout << " (" << tok.GetLexeme() << ")"; return out; } /** * Current token is used to set previous token before returned * Previous token is used to compare to the current token and check * for invalid arangment such as no begin token. */ LexItem currentToken; LexItem previousToken; /** * Return the next token within a file */ LexItem getNextToken(istream& in, int& linenum) { enum TokenState { START, INID, INSTRING, ININT, INREAL, INCOMMENT, SIGN} lexstate = START; std::string lexeme; char character; char nextCharacter; // Search until a token is found or eof is reached. while (in.get(character)) { switch (lexstate) { // Basic state of searching for a token case START: if (character == '\n') linenum++; // If eof is found finish searching if (in.peek() == -1) { if (previousToken.GetToken() != END) return LexItem(ERR, "No END Token", previousToken.GetLinenum()); return LexItem(DONE, lexeme, linenum); } // Spaces are meaningless for token analysis and are skipped if (std::isspace(character)) continue; lexeme = character; // Check for comment with // if (character == '/' && char(in.peek()) == '/') { lexstate = INCOMMENT; continue; } // Check for signs if (character == '+' || character == '-' || character == '*' || character == '/' || character == '(' || character == ')' || character == '=' || character == ',' || character == ';') { lexstate = SIGN; continue; } // Check for string if (character == '\"') { lexstate = INSTRING; continue; } // Check for ints if (std::isdigit(character)) { lexstate = ININT; continue; } // Check for reals if (character == '.') { lexstate = INREAL; continue; } // Check for identifiers if (std::isalpha(character)) { lexstate = INID; continue; } // If a character cannot be classified into a state it must be an error return LexItem(ERR, lexeme, linenum); case INID: // Regex is used to match strings to proper formatting if (std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*"))) lexeme += character; if (in.peek() == -1 || !std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*"))) { lexstate = START; in.putback(character); // Check for reserved keywords as identifiers if (lexeme == "begin") { if (previousToken.GetToken() != ERR) return LexItem(ERR, lexeme, linenum); currentToken = LexItem(BEGIN, lexeme, linenum); } else if (lexeme == "print") currentToken = LexItem(PRINT, lexeme, linenum); else if (lexeme == "end") { if (previousToken.GetToken() != SCOMA) return LexItem(ERR, previousToken.GetLexeme(), linenum); currentToken = LexItem(END, lexeme, linenum); } else if (lexeme == "if") currentToken = LexItem(IF, lexeme, linenum); else if (lexeme == "then") currentToken = LexItem(THEN, lexeme, linenum); else { if (previousToken.GetToken() == IDENT) return LexItem(ERR, lexeme, linenum); currentToken = LexItem(IDENT, lexeme, linenum); } // Check for no begin token if (currentToken != BEGIN && previousToken == ERR) return LexItem(ERR, "No BEGIN Token", currentToken.GetLinenum()); previousToken = currentToken; return currentToken; } break; case INSTRING: // Check for no begin token if (previousToken == ERR) return LexItem(ERR, "No Begin Token", linenum); // String cannot contain multiple lines, must be an error if (character == 10) return LexItem(ERR, lexeme, linenum); // Check lexeme for unfished string if (std::regex_match(lexeme + character, std::regex("\"[ -~]*"))) { if (character == '\\' && in.peek() == '\"') { lexeme += character; in.get(character); lexeme += character; continue; } else lexeme += character; } // Check lexeme for finished string if (std::regex_match(lexeme + character, std::regex("\"[ -~]*\""))) { lexstate = START; currentToken = LexItem(SCONST, lexeme, linenum); previousToken = currentToken; return currentToken; } break; case ININT: // Check for no begin token if (previousToken == ERR) return LexItem(ERR, "No Begin Token", linenum); // Checks if an alpha character is next to an integer number if (std::isalpha(character)) return LexItem(ERR, lexeme + character, linenum); if (std::regex_match(lexeme + character, std::regex("[0-9]+"))) { lexeme += character; } // If a period is found then the int is actual a real number else if(character == '.') { lexstate = INREAL; in.putback(character); continue; } else { lexstate = START; in.putback(character); currentToken = LexItem(ICONST, lexeme, linenum); previousToken = currentToken; return currentToken; } break; case INREAL: // Check for no begin token if (previousToken == ERR) return LexItem(ERR, "No Begin Token", linenum); // Checks if an alpha character is next to a real number if (std::isalpha(character)) return LexItem(ERR, lexeme + character, linenum); if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]+"))) { lexeme += character; } else if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]*"))) { lexeme += character; } else { if (lexeme[lexeme.length() - 1] == '.') return LexItem(ERR, lexeme, linenum); lexstate = START; in.putback(character); currentToken = LexItem(RCONST, lexeme, linenum); previousToken = currentToken; return currentToken; } break; case INCOMMENT: // Because comment is not a token it can be ignored if (character == '\n') { linenum++; lexstate = START; } continue; case SIGN: // Check for no begin token if (previousToken == ERR) return LexItem(ERR, "No Begin Token", linenum); /** * Signs are a little more complex and have to be handled individually * for the most part. Each sign can have a differnet type of token * in front of it requiring differnt checks. */ if (lexeme == "+" || lexeme == "*" || lexeme == "/") { Token token = previousToken.GetToken(); if (token == IDENT || token == ICONST || token == RCONST) { lexstate = START; in.putback(character); if (lexeme == "+") currentToken = LexItem(PLUS, lexeme, linenum); else if (lexeme == "*") currentToken = LexItem(MULT, lexeme, linenum); else currentToken = LexItem(DIV, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == "-") { Token token = previousToken.GetToken(); if (token == IDENT || token == ICONST || token == RCONST || token == EQ) { lexstate = START; in.putback(character); currentToken = LexItem(MINUS, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == "(") { Token token = previousToken.GetToken(); if (token == IF || token == EQ || token == PLUS || token == MINUS || token == MULT || token == DIV) { lexstate = START; in.putback(character); currentToken = LexItem(LPAREN, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == ")") { Token token = previousToken.GetToken(); if (token == ICONST || token == RCONST || token == IDENT) { lexstate = START; in.putback(character); currentToken = LexItem(RPAREN, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == "=") { Token token = previousToken.GetToken(); if (token == IDENT) { lexstate = START; in.putback(character); currentToken = LexItem(EQ, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == ",") { Token token = previousToken.GetToken(); if (token == SCONST) { lexstate = START; in.putback(character); currentToken = LexItem(COMA, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } if (lexeme == ";") { Token token = previousToken.GetToken(); if (token == SCONST || token == ICONST || token == RCONST || token == IDENT) { lexstate = START; in.putback(character); currentToken = LexItem(SCOMA, lexeme, linenum); previousToken = currentToken; return currentToken; } else return LexItem(ERR, lexeme + character, linenum); } break; } } return LexItem(DONE, "", linenum); }
Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++
and start coding!
OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.
#include <iostream>
#include <string>
using namespace std;
int main()
{
string name;
cout << "Enter name:";
getline (cin, name);
cout << "Hello " << name;
return 0;
}
C++ is a widely used middle-level programming language.
When ever you want to perform a set of operations based on a condition If-Else is used.
if(conditional-expression) {
//code
}
else {
//code
}
You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.
Switch is an alternative to If-Else-If ladder.
switch(conditional-expression){
case value1:
// code
break; // optional
case value2:
// code
break; // optional
......
default:
code to be executed when all the above cases are not matched;
}
For loop is used to iterate a set of statements based on a condition.
for(Initialization; Condition; Increment/decrement){
//code
}
While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.
while (condition) {
// code
}
Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.
do {
// code
} while (condition);
Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.
return_type function_name(parameters);
function_name (parameters)
return_type function_name(parameters) {
// code
}