#include <iostream>
#include <regex>
#include <map>
#include "lex.h"
/**
* Used in previous assignment to print value of token
*/
static std::map<Token, std::string> tokenPrint {
{PRINT, "PRINT"}, {IF, "IF"},
{BEGIN, "BEGIN"}, {END, "END"},
{THEN, "THEN"}, {IDENT, "IDENT"},
{ICONST, "ICONST"}, {SCONST, "SCONST"},
{RCONST, "RCONST"}, {PLUS, "PLUS"},
{MINUS, "MINUS"}, {MULT, "MULT"},
{DIV, "DIV"}, {EQ, "EQ"},
{LPAREN, "LPAREN"}, {RPAREN, "RPAREN"},
{SCOMA, "SCOMA"}, {COMA, "COMA"},
{ERR, "ERR"}, {DONE, "DONE"}
};
/**
* Operator overloading << for Token object
*/
ostream& operator<<(ostream& out, const LexItem& tok) {
std::string *token = &tokenPrint[tok.GetToken()];
std::cout << *token;
bool eval = (tok.GetToken() == SCONST) || (tok.GetToken() == RCONST) ||
(tok.GetToken() == ICONST) || (tok.GetToken() == IDENT) ||
(tok.GetToken() == ERR);
if (eval)
std::cout << " (" << tok.GetLexeme() << ")";
return out;
}
/**
* Current token is used to set previous token before returned
* Previous token is used to compare to the current token and check
* for invalid arangment such as no begin token.
*/
LexItem currentToken;
LexItem previousToken;
/**
* Return the next token within a file
*/
LexItem getNextToken(istream& in, int& linenum) {
enum TokenState { START, INID, INSTRING, ININT, INREAL, INCOMMENT, SIGN} lexstate = START;
std::string lexeme;
char character;
char nextCharacter;
// Search until a token is found or eof is reached.
while (in.get(character)) {
switch (lexstate) {
// Basic state of searching for a token
case START:
if (character == '\n')
linenum++;
// If eof is found finish searching
if (in.peek() == -1) {
if (previousToken.GetToken() != END)
return LexItem(ERR, "No END Token", previousToken.GetLinenum());
return LexItem(DONE, lexeme, linenum);
}
// Spaces are meaningless for token analysis and are skipped
if (std::isspace(character))
continue;
lexeme = character;
// Check for comment with //
if (character == '/' && char(in.peek()) == '/') {
lexstate = INCOMMENT;
continue;
}
// Check for signs
if (character == '+' || character == '-' || character == '*' ||
character == '/' || character == '(' || character == ')' ||
character == '=' || character == ',' || character == ';') {
lexstate = SIGN;
continue;
}
// Check for string
if (character == '\"') {
lexstate = INSTRING;
continue;
}
// Check for ints
if (std::isdigit(character)) {
lexstate = ININT;
continue;
}
// Check for reals
if (character == '.') {
lexstate = INREAL;
continue;
}
// Check for identifiers
if (std::isalpha(character)) {
lexstate = INID;
continue;
}
// If a character cannot be classified into a state it must be an error
return LexItem(ERR, lexeme, linenum);
case INID:
// Regex is used to match strings to proper formatting
if (std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*")))
lexeme += character;
if (in.peek() == -1 || !std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*"))) {
lexstate = START;
in.putback(character);
// Check for reserved keywords as identifiers
if (lexeme == "begin") {
if (previousToken.GetToken() != ERR)
return LexItem(ERR, lexeme, linenum);
currentToken = LexItem(BEGIN, lexeme, linenum);
}
else if (lexeme == "print")
currentToken = LexItem(PRINT, lexeme, linenum);
else if (lexeme == "end") {
if (previousToken.GetToken() != SCOMA)
return LexItem(ERR, previousToken.GetLexeme(), linenum);
currentToken = LexItem(END, lexeme, linenum);
}
else if (lexeme == "if")
currentToken = LexItem(IF, lexeme, linenum);
else if (lexeme == "then")
currentToken = LexItem(THEN, lexeme, linenum);
else {
if (previousToken.GetToken() == IDENT)
return LexItem(ERR, lexeme, linenum);
currentToken = LexItem(IDENT, lexeme, linenum);
}
// Check for no begin token
if (currentToken != BEGIN && previousToken == ERR)
return LexItem(ERR, "No BEGIN Token", currentToken.GetLinenum());
previousToken = currentToken;
return currentToken;
}
break;
case INSTRING:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// String cannot contain multiple lines, must be an error
if (character == 10)
return LexItem(ERR, lexeme, linenum);
// Check lexeme for unfished string
if (std::regex_match(lexeme + character, std::regex("\"[ -~]*"))) {
if (character == '\\' && in.peek() == '\"') {
lexeme += character;
in.get(character);
lexeme += character;
continue;
}
else
lexeme += character;
}
// Check lexeme for finished string
if (std::regex_match(lexeme + character, std::regex("\"[ -~]*\""))) {
lexstate = START;
currentToken = LexItem(SCONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;
case ININT:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// Checks if an alpha character is next to an integer number
if (std::isalpha(character))
return LexItem(ERR, lexeme + character, linenum);
if (std::regex_match(lexeme + character, std::regex("[0-9]+"))) {
lexeme += character;
}
// If a period is found then the int is actual a real number
else if(character == '.') {
lexstate = INREAL;
in.putback(character);
continue;
}
else {
lexstate = START;
in.putback(character);
currentToken = LexItem(ICONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;
case INREAL:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// Checks if an alpha character is next to a real number
if (std::isalpha(character))
return LexItem(ERR, lexeme + character, linenum);
if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]+"))) {
lexeme += character;
}
else if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]*"))) {
lexeme += character;
}
else {
if (lexeme[lexeme.length() - 1] == '.')
return LexItem(ERR, lexeme, linenum);
lexstate = START;
in.putback(character);
currentToken = LexItem(RCONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;
case INCOMMENT:
// Because comment is not a token it can be ignored
if (character == '\n') {
linenum++;
lexstate = START;
}
continue;
case SIGN:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
/**
* Signs are a little more complex and have to be handled individually
* for the most part. Each sign can have a differnet type of token
* in front of it requiring differnt checks.
*/
if (lexeme == "+" || lexeme == "*" || lexeme == "/") {
Token token = previousToken.GetToken();
if (token == IDENT || token == ICONST || token == RCONST) {
lexstate = START;
in.putback(character);
if (lexeme == "+")
currentToken = LexItem(PLUS, lexeme, linenum);
else if (lexeme == "*")
currentToken = LexItem(MULT, lexeme, linenum);
else
currentToken = LexItem(DIV, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "-") {
Token token = previousToken.GetToken();
if (token == IDENT || token == ICONST || token == RCONST || token == EQ) {
lexstate = START;
in.putback(character);
currentToken = LexItem(MINUS, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "(") {
Token token = previousToken.GetToken();
if (token == IF || token == EQ || token == PLUS || token == MINUS ||
token == MULT || token == DIV) {
lexstate = START;
in.putback(character);
currentToken = LexItem(LPAREN, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ")") {
Token token = previousToken.GetToken();
if (token == ICONST || token == RCONST || token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(RPAREN, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "=") {
Token token = previousToken.GetToken();
if (token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(EQ, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ",") {
Token token = previousToken.GetToken();
if (token == SCONST) {
lexstate = START;
in.putback(character);
currentToken = LexItem(COMA, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ";") {
Token token = previousToken.GetToken();
if (token == SCONST || token == ICONST || token == RCONST || token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(SCOMA, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
break;
}
}
return LexItem(DONE, "", linenum);
} Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++ and start coding!
OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.
#include <iostream>
#include <string>
using namespace std;
int main()
{
string name;
cout << "Enter name:";
getline (cin, name);
cout << "Hello " << name;
return 0;
}
C++ is a widely used middle-level programming language.
When ever you want to perform a set of operations based on a condition If-Else is used.
if(conditional-expression) {
//code
}
else {
//code
}
You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.
Switch is an alternative to If-Else-If ladder.
switch(conditional-expression){
case value1:
// code
break; // optional
case value2:
// code
break; // optional
......
default:
code to be executed when all the above cases are not matched;
}
For loop is used to iterate a set of statements based on a condition.
for(Initialization; Condition; Increment/decrement){
//code
}
While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.
while (condition) {
// code
}
Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.
do {
// code
} while (condition);
Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.
return_type function_name(parameters);
function_name (parameters)
return_type function_name(parameters) {
// code
}