import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

import random
from sklearn.datasets import make_blobs

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

from sklearn.model_selection import ParameterGrid
random_seed = 35

from sklearn.metrics import accuracy_score
import sklearn.metrics as metrics

import pyarrow.feather as pf

arrtab = pf.read_table("./data_1.arr")
df = arrtab.to_pandas(timestamp_as_object=True)

df[['L', 'W', 'D']] = df['lotWaferDie'].str.split('-|_', expand=True)
df = df.drop(columns=['lotWaferDie'])

#df = df.drop(columns=['DefArea'])
#df = df.drop(columns=['xidx'])
#df = df.drop(columns=['yidx'])

#from sklearn.preprocessing import LabelEncoder
#label_encoder = LabelEncoder()
#df['lotWaferDie'] = label_encoder.fit_transform(df['lotWaferDie'])

# use one-hot encoding
for col in df.select_dtypes(object).columns:
    df = pd.concat([
        df.drop(col, axis=1), pd.get_dummies(df[col], prefix=('d_' + col))
    ], axis=1)
    
    
# Train/Validation/Test split
from sklearn.model_selection import train_test_split

# 
Xtrain, Xrest, ytrain, yrest = train_test_split(
    df.drop(columns=['fail']), df.fail, test_size=0.4, random_state=random_seed, stratify=df.fail
)


Xtest, Xval, ytest, yval = train_test_split(
    Xrest, yrest, test_size=0.5, random_state=random_seed, stratify=yrest
)

param_grid = {
    'max_depth': range(1, 40), 
    'criterion': ['entropy']
}
param_comb = ParameterGrid(param_grid)

from sklearn.metrics import f1_score
val_acc = []
param_f1_pairs = []  

for i,params in enumerate(param_comb):
    dt = DecisionTreeClassifier(max_depth=params['max_depth'], criterion=params['criterion'])
    dt.fit(Xtrain, ytrain)
    
    val_acc.append(metrics.accuracy_score(yval, dt.predict(Xval)))
    val_score = accuracy_score(yval, dt.predict(Xval))
    print(f"Iteration {i+1}/{len(param_comb)} - Validation Score: {val_score:.4f} - Parameters: {params}")
    
    predicted_classes = dt.predict(Xtest)
    f1 = f1_score(ytest, predicted_classes)
    param_f1_pairs.append((params, f1))  # Uložení parametrů a příslušného F1 skóre
    print("F1 skóre:", f1)