#include <iostream>
#include <regex>
#include <map>
#include "lex.h"

/**
* Used in previous assignment to print value of token
*/
static std::map<Token, std::string> tokenPrint {
{PRINT, "PRINT"}, {IF, "IF"},
{BEGIN, "BEGIN"}, {END, "END"},
{THEN, "THEN"}, {IDENT, "IDENT"},
{ICONST, "ICONST"}, {SCONST, "SCONST"},
{RCONST, "RCONST"}, {PLUS, "PLUS"},
{MINUS, "MINUS"}, {MULT, "MULT"},
{DIV, "DIV"}, {EQ, "EQ"},
{LPAREN, "LPAREN"}, {RPAREN, "RPAREN"},
{SCOMA, "SCOMA"}, {COMA, "COMA"},
{ERR, "ERR"}, {DONE, "DONE"}
};

/**
* Operator overloading << for Token object
*/
ostream& operator<<(ostream& out, const LexItem& tok) {
std::string *token = &tokenPrint[tok.GetToken()];
std::cout << *token;

bool eval = (tok.GetToken() == SCONST) || (tok.GetToken() == RCONST) ||
(tok.GetToken() == ICONST) || (tok.GetToken() == IDENT) ||
(tok.GetToken() == ERR);

if (eval)
std::cout << " (" << tok.GetLexeme() << ")";
return out;
}

/**
* Current token is used to set previous token before returned
* Previous token is used to compare to the current token and check
* for invalid arangment such as no begin token.
*/
LexItem currentToken;
LexItem previousToken;

/**
* Return the next token within a file
*/
LexItem getNextToken(istream& in, int& linenum) {
enum TokenState { START, INID, INSTRING, ININT, INREAL, INCOMMENT, SIGN} lexstate = START;
std::string lexeme;
char character;
char nextCharacter;

// Search until a token is found or eof is reached.
while (in.get(character)) {
switch (lexstate) {
// Basic state of searching for a token
case START:
if (character == '\n')
linenum++;
  
// If eof is found finish searching
if (in.peek() == -1) {
if (previousToken.GetToken() != END)
return LexItem(ERR, "No END Token", previousToken.GetLinenum());
return LexItem(DONE, lexeme, linenum);
}

// Spaces are meaningless for token analysis and are skipped
if (std::isspace(character))
continue;
  
lexeme = character;

// Check for comment with //
if (character == '/' && char(in.peek()) == '/') {
lexstate = INCOMMENT;
continue;
}

// Check for signs
if (character == '+' || character == '-' || character == '*' ||
character == '/' || character == '(' || character == ')' ||
character == '=' || character == ',' || character == ';') {
lexstate = SIGN;
continue;
}

// Check for string
if (character == '\"') {
lexstate = INSTRING;
continue;
}

// Check for ints
if (std::isdigit(character)) {
lexstate = ININT;
continue;
}

// Check for reals
if (character == '.') {
lexstate = INREAL;
continue;
}

// Check for identifiers
if (std::isalpha(character)) {
lexstate = INID;
continue;
}
  
// If a character cannot be classified into a state it must be an error
return LexItem(ERR, lexeme, linenum);

case INID:
// Regex is used to match strings to proper formatting
if (std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*")))
lexeme += character;
if (in.peek() == -1 || !std::regex_match(lexeme + character, std::regex("[a-zA-Z][a-zA-Z0-9]*"))) {
lexstate = START;
in.putback(character);

// Check for reserved keywords as identifiers
if (lexeme == "begin") {
if (previousToken.GetToken() != ERR)
return LexItem(ERR, lexeme, linenum);
currentToken = LexItem(BEGIN, lexeme, linenum);
}
else if (lexeme == "print")
currentToken = LexItem(PRINT, lexeme, linenum);
else if (lexeme == "end") {
if (previousToken.GetToken() != SCOMA)
return LexItem(ERR, previousToken.GetLexeme(), linenum);
currentToken = LexItem(END, lexeme, linenum);
}
else if (lexeme == "if")
currentToken = LexItem(IF, lexeme, linenum);
else if (lexeme == "then")
currentToken = LexItem(THEN, lexeme, linenum);
else {
if (previousToken.GetToken() == IDENT)
return LexItem(ERR, lexeme, linenum);
currentToken = LexItem(IDENT, lexeme, linenum);
}

// Check for no begin token
if (currentToken != BEGIN && previousToken == ERR)
return LexItem(ERR, "No BEGIN Token", currentToken.GetLinenum());
previousToken = currentToken;
return currentToken;
}
break;

case INSTRING:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// String cannot contain multiple lines, must be an error
if (character == 10)
return LexItem(ERR, lexeme, linenum);

// Check lexeme for unfished string
if (std::regex_match(lexeme + character, std::regex("\"[ -~]*"))) {
if (character == '\\' && in.peek() == '\"') {
lexeme += character;
in.get(character);
lexeme += character;
continue;
}
else
lexeme += character;
}

// Check lexeme for finished string
if (std::regex_match(lexeme + character, std::regex("\"[ -~]*\""))) {
lexstate = START;
currentToken = LexItem(SCONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;

case ININT:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// Checks if an alpha character is next to an integer number
if (std::isalpha(character))
return LexItem(ERR, lexeme + character, linenum);
if (std::regex_match(lexeme + character, std::regex("[0-9]+"))) {
lexeme += character;
}
// If a period is found then the int is actual a real number
else if(character == '.') {
lexstate = INREAL;
in.putback(character);
continue;
}
else {
lexstate = START;
in.putback(character);
currentToken = LexItem(ICONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;

case INREAL:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
// Checks if an alpha character is next to a real number
if (std::isalpha(character))
return LexItem(ERR, lexeme + character, linenum);
if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]+"))) {
lexeme += character;
}
else if (std::regex_match(lexeme + character, std::regex("[0-9]*\\.[0-9]*"))) {
lexeme += character;
}
else {
if (lexeme[lexeme.length() - 1] == '.')
return LexItem(ERR, lexeme, linenum);
lexstate = START;
in.putback(character);
currentToken = LexItem(RCONST, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
break;

case INCOMMENT:
// Because comment is not a token it can be ignored
if (character == '\n') {
linenum++;
lexstate = START;
}
continue;

case SIGN:
// Check for no begin token
if (previousToken == ERR)
return LexItem(ERR, "No Begin Token", linenum);
/**
* Signs are a little more complex and have to be handled individually
* for the most part. Each sign can have a differnet type of token
* in front of it requiring differnt checks.
*/
if (lexeme == "+" || lexeme == "*" || lexeme == "/") {
Token token = previousToken.GetToken();
if (token == IDENT || token == ICONST || token == RCONST) {
lexstate = START;
in.putback(character);
if (lexeme == "+")
currentToken = LexItem(PLUS, lexeme, linenum);
else if (lexeme == "*")
currentToken = LexItem(MULT, lexeme, linenum);
else
currentToken = LexItem(DIV, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "-") {
Token token = previousToken.GetToken();
if (token == IDENT || token == ICONST || token == RCONST || token == EQ) {
lexstate = START;
in.putback(character);
currentToken = LexItem(MINUS, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "(") {
Token token = previousToken.GetToken();
if (token == IF || token == EQ || token == PLUS || token == MINUS ||
token == MULT || token == DIV) {
lexstate = START;
in.putback(character);
currentToken = LexItem(LPAREN, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ")") {
Token token = previousToken.GetToken();
if (token == ICONST || token == RCONST || token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(RPAREN, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == "=") {
Token token = previousToken.GetToken();
if (token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(EQ, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ",") {
Token token = previousToken.GetToken();
if (token == SCONST) {
lexstate = START;
in.putback(character);
currentToken = LexItem(COMA, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
if (lexeme == ";") {
Token token = previousToken.GetToken();
if (token == SCONST || token == ICONST || token == RCONST || token == IDENT) {
lexstate = START;
in.putback(character);
currentToken = LexItem(SCOMA, lexeme, linenum);
previousToken = currentToken;
return currentToken;
}
else
return LexItem(ERR, lexeme + character, linenum);
}
break;
}   
}
return LexItem(DONE, "", linenum);
}

created 5 years ago

C++ Online Compiler

Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++ and start coding!

Read inputs from stdin

OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.

#include <iostream>
#include <string>
using namespace std;

int main() 
{
    string name;
    cout << "Enter name:";
    getline (cin, name);
    cout << "Hello " << name;
    return 0;
}

About C++

C++ is a widely used middle-level programming language.

Supports different platforms like Windows, various Linux flavours, MacOS etc
C++ supports OOPS concepts like Inheritance, Polymorphism, Encapsulation and Abstraction.
Case-sensitive
C++ is a compiler based language
C++ supports structured programming language
C++ provides alot of inbuilt functions and also supports dynamic memory allocation.
Like C, C++ also allows you to play with memory using Pointers.

Syntax help

Loops

1. If-Else:

When ever you want to perform a set of operations based on a condition If-Else is used.

if(conditional-expression) {
   //code
}
else {
   //code
}

You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.

2. Switch:

Switch is an alternative to If-Else-If ladder.

switch(conditional-expression){    
case value1:    
 // code    
 break;  // optional  
case value2:    
 // code    
 break;  // optional  
......    
    
default:     
 code to be executed when all the above cases are not matched;    
}

3. For:

For loop is used to iterate a set of statements based on a condition.

for(Initialization; Condition; Increment/decrement){  
  //code  
}

4. While:

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while (condition) {  
// code 
}

5. Do-While:

Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.

do {  
 // code 
} while (condition);

Functions

Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.

How to declare a Function:

return_type function_name(parameters);

How to call a Function:

function_name (parameters)

How to define a Function:

return_type function_name(parameters) {  
 // code
}