#include<cstdint>
    #include<cmath>
    #include<cstring>
    #include <stdint.h>  // <cstdint> is preferred in C++, but stdint.h works.

    #ifdef _MSC_VER
    # include <intrin.h>
    #else
    # include <x86intrin.h>
    #endif

    // optional wrapper if you don't want to just use __rdtsc() everywhere
    inline
    uint64_t readTSC() {
        // _mm_lfence();  // optionally wait for earlier insns to retire before reading the clock
        uint64_t tsc = __rdtsc();
        // _mm_lfence();  // optionally block later instructions until rdtsc retires
        return tsc;
    }

    void intTest(uint32_t *  const __restrict__  val1, uint32_t *  const __restrict__  val2, uint32_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            val3[i] = val1[i]/val2[i]; // scalar idiv
        }
    }

    void int64Test(uint64_t *  const __restrict__  val1, uint64_t *  const __restrict__  val2, uint64_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            val3[i] = val1[i]/val2[i]; // scalar idiv
        }
    }

    void intEmulationTest(uint32_t *  const __restrict__  val1, uint32_t *  const __restrict__  val2, uint32_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            double v1 = val1[i];
            double v2 = val2[i];
            double v3 = v1/v2;
            double t = v3 - (uint32_t)v3;
            v3 += t<0.99?0.01:0.0;
            val3[i] = v3;   // 42-instruction code-bloat 2x faster than 1 idiv >:c
        }
    }

    // writing bits of integer
    // directly to bits of mantissa
    // up to 23 bits shoul be ok
    // do not use ffast-math, flushes this denormal to zero!!
    // "fp rounding mode: truncation" is required
    // and do no divide by zero
    // warning: 10x speedup in Zen2 architecture
    void intMagicTest(uint32_t *  const __restrict__  val1, uint32_t *  const __restrict__  val2, uint32_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            float v1;
            float v2;
            std::memcpy(
              &v1, //mantissa dest
              &val1[i], //23 least significant bits src
              sizeof(float) // write all bytes anyway. Assume float is 4 bytes as uint32_t!
            );
            std::memcpy(&v2,&val2[i],sizeof(float));

            // I don't know how to de-normalize a float
            //   (result of v1/v2)
            //   (so just let compiler convert it)
            // if de-normalization was possible
            //   then this could have no conversion latency at all
            val3[i] = v1/v2; // vdivps with only 1 conversion
        }
    }

    // writing bits of 32 integer (but in 64bit storage)
    // directly to bits of mantissa of double (53 bits enough?)
    // do not use ffast-math, flushes this denormal to zero!!
    // "fp rounding mode: truncation" is required
    // and do no divide by zero
    // warning: 10x speedup in Zen2 architecture
    void intMagicTestDouble(uint64_t *  const __restrict__  val1, uint64_t *  const __restrict__  val2, uint64_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            double v1;
            double v2;
            std::memcpy(
              &v1, //mantissa dest
              &val1[i], //53 least significant bits src
              sizeof(double) // write all bytes anyway. Assume float is 8 bytes as uint64_t!
            );
            std::memcpy(&v2,&val2[i],sizeof(double));

            // I don't know how to de-normalize a float
            //   (result of v1/v2)
            //   (so just let compiler convert it)
            // if de-normalization was possible
            //   then this could have no conversion latency at all
            val3[i] = v1/v2; // vdivps with only 1 conversion
        }
    }

    // writing bits of 32 integer (using temporary 64bit storage)
    // directly to bits of mantissa of double (53 bits enough?)
    // do not use ffast-math, flushes this denormal to zero!!
    // "fp rounding mode: truncation" is required
    // and do no divide by zero
    // warning: 10x speedup in Zen2 architecture
    void intMagicTestDoubleTmp(uint32_t *  const __restrict__  val1, uint32_t *  const __restrict__  val2, uint32_t *  const __restrict__  val3)
    {
        for(int i=0;i<1024;i++)
        {
            uint64_t tmp1 = val1[i];
            uint64_t tmp2 = val2[i];
            double v1;
            double v2;
            std::memcpy(
              &v1, //mantissa dest
              &tmp1, //53 least significant bits src
              sizeof(double) // write all bytes anyway. Assume float is 8 bytes as uint64_t!
            );
            std::memcpy(&v2,&tmp2,sizeof(double));

            // I don't know how to de-normalize a float
            //   (result of v1/v2)
            //   (so just let compiler convert it)
            // if de-normalization was possible
            //   then this could have no conversion latency at all
            val3[i] = v1/v2; // vdivps with only 1 conversion
        }
    }
    #include <iostream>
    #include <cpuid.h>  // GCC-provided
    int main()
    {

        uint32_t a[1024],b[1024],c[1024];
        for(int k=0;k<1000;k++)
        for(int i=0;i<1024;i++)
        {
            a[i]=1+i*i+clock(); b[i]=1+i;
        }
        uint64_t a64[1024],b64[1024],c64[1024];
        for(int i=0;i<1024;i++)
        {
            a64[i]=1+i*i; b64[i]=1+i;
        }
        std::cout<<"emulation:"<<std::endl;
        auto t1 = readTSC() ;
        intEmulationTest(a,b,c);
        auto t2 = readTSC() ;
        for(int i=0;i<10;i++)
            std::cout<<c[i]<<" "<<std::endl;
        std::cout<<"magic:"<<std::endl;
        auto t3 = readTSC() ;
        intMagicTest(a,b,c);
        auto t4 = readTSC() ;
        for(int i=0;i<10;i++)
            std::cout<<c[i]<<" "<<std::endl;
        std::cout<<"int:"<<std::endl;
        auto t5 = readTSC() ;
        int64Test(a64,b64,c64);
        auto t6 = readTSC() ;
        for(int i=0;i<10;i++)
            std::cout<<c[i]<<" "<<std::endl;
        std::cout<<"magic double:"<<std::endl;
        auto t7 = readTSC() ;
        intMagicTestDouble(a64,b64,c64);
        auto t8 = readTSC() ;
        for(int i=0;i<10;i++)
            std::cout<<c[i]<<" "<<std::endl;
        std::cout<<"magic double tmp:"<<std::endl;
        auto t9 = readTSC() ;
        intMagicTestDoubleTmp(a,b,c);
        auto t10 = readTSC() ;
        for(int i=0;i<10;i++)
            std::cout<<c[i]<<" "<<std::endl;


char CPUBrandString[0x40];
unsigned int CPUInfo[4] = {0,0,0,0};

__cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
unsigned int nExIds = CPUInfo[0];

memset(CPUBrandString, 0, sizeof(CPUBrandString));

for (unsigned int i = 0x80000000; i <= nExIds; ++i)
{
    __cpuid(i, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);

    if (i == 0x80000002)
        memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
    else if (i == 0x80000003)
        memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
    else if (i == 0x80000004)
        memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
}

std::cout << "CPU Type: " << CPUBrandString << std::endl;


        std::cout<<"emulation: "<<t2-t1<<" cycles"<<std::endl;
        std::cout<<"magic: "<<t4-t3<<" cycles"<<std::endl;
        std::cout<<"int: "<<t6-t5<<" cycles"<<std::endl;
        std::cout<<"magic double: "<<t8-t7<<" cycles"<<std::endl;
        std::cout<<"magic double tmp: "<<t10-t9<<" cycles"<<std::endl;
        return 0;
    }

created 3 years ago

C++ Online Compiler

Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++ and start coding!

Read inputs from stdin

OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.

#include <iostream>
#include <string>
using namespace std;

int main() 
{
    string name;
    cout << "Enter name:";
    getline (cin, name);
    cout << "Hello " << name;
    return 0;
}

About C++

C++ is a widely used middle-level programming language.

Supports different platforms like Windows, various Linux flavours, MacOS etc
C++ supports OOPS concepts like Inheritance, Polymorphism, Encapsulation and Abstraction.
Case-sensitive
C++ is a compiler based language
C++ supports structured programming language
C++ provides alot of inbuilt functions and also supports dynamic memory allocation.
Like C, C++ also allows you to play with memory using Pointers.

Syntax help

Loops

1. If-Else:

When ever you want to perform a set of operations based on a condition If-Else is used.

if(conditional-expression) {
   //code
}
else {
   //code
}

You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.

2. Switch:

Switch is an alternative to If-Else-If ladder.

switch(conditional-expression){    
case value1:    
 // code    
 break;  // optional  
case value2:    
 // code    
 break;  // optional  
......    
    
default:     
 code to be executed when all the above cases are not matched;    
}

3. For:

For loop is used to iterate a set of statements based on a condition.

for(Initialization; Condition; Increment/decrement){  
  //code  
}

4. While:

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while (condition) {  
// code 
}

5. Do-While:

Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.

do {  
 // code 
} while (condition);

Functions

Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.

How to declare a Function:

return_type function_name(parameters);

How to call a Function:

function_name (parameters)

How to define a Function:

return_type function_name(parameters) {  
 // code
}