// 1. Import document loaders for different file formats
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
import { JSONLoader } from "langchain/document_loaders/fs/json";
import { TextLoader } from "langchain/document_loaders/fs/text";
import { CSVLoader } from "langchain/document_loaders/fs/csv";
import { PDFLoader } from "langchain/document_loaders/fs/pdf";

// 2. Import OpenAI language model and other related modules
import { OpenAI } from "langchain/llms/openai";
import { RetrievalQAChain } from "langchain/chains";
import { HNSWLib } from "langchain/vectorstores/hnswlib";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";

// 3. Import Tiktoken for token counting
import { Tiktoken } from "@dqbd/tiktoken/lite";
import { load } from "@dqbd/tiktoken/load";
import registry from "@dqbd/tiktoken/registry.json" assert { type: "json" };
import models from "@dqbd/tiktoken/model_to_encoding.json" assert { type: "json" };

// 4. Import dotenv for loading environment variables and fs for file system operations
import dotenv from "dotenv";
import fs from "fs";
dotenv.config();

// 5. Initialize the document loader with supported file formats
const loader = new DirectoryLoader("./documents", {
  ".json": (path) => new JSONLoader(path),
  ".txt": (path) => new TextLoader(path),
  ".csv": (path) => new CSVLoader(path),
  ".pdf": (path) => new PDFLoader(path),
});

// 6. Load documents from the specified directory
console.log("Loading docs...");
const docs = await loader.load();
console.log("Docs loaded.");

// 7. Define a function to calculate the cost of tokenizing the documents
async function calculateCost() {
  const modelName = "text-embedding-ada-002";
  const modelKey = models[modelName];
  const model = await load(registry[modelKey]);
  const encoder = new Tiktoken(
    model.bpe_ranks,
    model.special_tokens,
    model.pat_str
  );
  const tokens = encoder.encode(JSON.stringify(docs));
  const tokenCount = tokens.length;
  const ratePerThousandTokens = 0.0004;
  const cost = (tokenCount / 1000) * ratePerThousandTokens;
  encoder.free();
  return cost;
}

const VECTOR_STORE_PATH = "Documents.index";
const question = "Tell me about these docs";

// 8. Define a function to normalize the content of the documents
function normalizeDocuments(docs) {
  return docs.map((doc) => {
    if (typeof doc.pageContent === "string") {
      return doc.pageContent;
    } else if (Array.isArray(doc.pageContent)) {
      return doc.pageContent.join("\n");
    }
  });
}

// 9. Define the main function to run the entire process
export const run = async () => {
  // 10. Calculate the cost of tokenizing the documents
  console.log("Calculating cost...");
  const cost = await calculateCost();
  console.log("Cost calculated:", cost);

  // 11. Check if the cost is within the acceptable limit
  if (cost <= 1) {
    // 12. Initialize the OpenAI language model
    const model = new OpenAI({});

    let vectorStore;

    // 13. Check if an existing vector store is available
    console.log("Checking for existing vector store...");
    if (fs.existsSync(VECTOR_STORE_PATH)) {
      // 14. Load the existing vector store
      console.log("Loading existing vector store...");
      vectorStore = await HNSWLib.load(
        VECTOR_STORE_PATH,
        new OpenAIEmbeddings()
      );
      console.log("Vector store loaded.");
    } else {
      // 15. Create a new vector store if one does not exist
      console.log("Creating new vector store...");
      const textSplitter = new RecursiveCharacterTextSplitter({
        chunkSize: 1000,
      });
      const normalizedDocs = normalizeDocuments(docs);
      const splitDocs = await textSplitter.createDocuments(normalizedDocs);

      // 16. Generate the vector store from the documents
      vectorStore = await HNSWLib.fromDocuments(
        splitDocs,
        new OpenAIEmbeddings()
      );
      // 17. Save the vector store to the specified path
      await vectorStore.save(VECTOR_STORE_PATH);

      console.log("Vector store created.");
    }

    // 18. Create a retrieval chain using the language model and vector store
    console.log("Creating retrieval chain...");
    const chain = RetrievalQAChain.fromLLM(model, vectorStore.asRetriever());

    // 19. Query the retrieval chain with the specified question
    console.log("Querying chain...");
    const res = await chain.call({ query: question });
    console.log({ res });
  } else {
    // 20. If the cost exceeds the limit, skip the embedding process
    console.log("The cost of embedding exceeds $1. Skipping embeddings.");
  }
};

// 21. Run the main function
run(); 

Javascript Online Compiler

Write, Run & Share Javascript code online using OneCompiler's JS online compiler for free. It's one of the robust, feature-rich online compilers for Javascript language. Getting started with the OneCompiler's Javascript editor is easy and fast. The editor shows sample boilerplate code when you choose language as Javascript and start coding.

About Javascript

Javascript(JS) is a object-oriented programming language which adhere to ECMA Script Standards. Javascript is required to design the behaviour of the web pages.

Key Features

  • Open-source
  • Just-in-time compiled language
  • Embedded along with HTML and makes web pages alive
  • Originally named as LiveScript.
  • Executable in both browser and server which has Javascript engines like V8(chrome), SpiderMonkey(Firefox) etc.

Syntax help

STDIN Example

var readline = require('readline');
var rl = readline.createInterface({
  input: process.stdin,
  output: process.stdout,
  terminal: false
});

rl.on('line', function(line){
    console.log("Hello, " + line);
});

variable declaration

KeywordDescriptionScope
varVar is used to declare variables(old way of declaring variables)Function or global scope
letlet is also used to declare variables(new way)Global or block Scope
constconst is used to declare const values. Once the value is assigned, it can not be modifiedGlobal or block Scope

Backtick Strings

Interpolation

let greetings = `Hello ${name}`

Multi line Strings

const msg = `
hello
world!
`

Arrays

An array is a collection of items or values.

Syntax:

let arrayName = [value1, value2,..etc];
// or
let arrayName = new Array("value1","value2",..etc);

Example:

let mobiles = ["iPhone", "Samsung", "Pixel"];

// accessing an array
console.log(mobiles[0]);

// changing an array element
mobiles[3] = "Nokia";

Arrow functions

Arrow Functions helps developers to write code in concise way, it’s introduced in ES6.
Arrow functions can be written in multiple ways. Below are couple of ways to use arrow function but it can be written in many other ways as well.

Syntax:

() => expression

Example:

const numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
const squaresOfEvenNumbers = numbers.filter(ele => ele % 2 == 0)
                                    .map(ele => ele ** 2);
console.log(squaresOfEvenNumbers);

De-structuring

Arrays

let [firstName, lastName] = ['Foo', 'Bar']

Objects

let {firstName, lastName} = {
  firstName: 'Foo',
  lastName: 'Bar'
}

rest(...) operator

 const {
    title,
    firstName,
    lastName,
    ...rest
  } = record;

Spread(...) operator

//Object spread
const post = {
  ...options,
  type: "new"
}
//array spread
const users = [
  ...adminUsers,
  ...normalUsers
]

Functions

function greetings({ name = 'Foo' } = {}) { //Defaulting name to Foo
  console.log(`Hello ${name}!`);
}
 
greet() // Hello Foo
greet({ name: 'Bar' }) // Hi Bar

Loops

1. If:

IF is used to execute a block of code based on a condition.

Syntax

if(condition){
    // code
}

2. If-Else:

Else part is used to execute the block of code when the condition fails.

Syntax

if(condition){
    // code
} else {
    // code
}

3. Switch:

Switch is used to replace nested If-Else statements.

Syntax

switch(condition){
    case 'value1' :
        //code
        [break;]
    case 'value2' :
        //code
        [break;]
    .......
    default :
        //code
        [break;]
}

4. For

For loop is used to iterate a set of statements based on a condition.

for(Initialization; Condition; Increment/decrement){  
//code  
} 

5. While

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while (condition) {  
  // code 
}  

6. Do-While

Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.

do {  
  // code 
} while (condition); 

Classes

ES6 introduced classes along with OOPS concepts in JS. Class is similar to a function which you can think like kind of template which will get called when ever you initialize class.

Syntax:

class className {
  constructor() { ... } //Mandatory Class method
  method1() { ... }
  method2() { ... }
  ...
}

Example:

class Mobile {
  constructor(model) {
    this.name = model;
  }
}

mbl = new Mobile("iPhone");