import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import math import random from sklearn.datasets import make_blobs from sklearn.tree import DecisionTreeClassifier from sklearn.tree import plot_tree from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.metrics import confusion_matrix from sklearn.metrics import ConfusionMatrixDisplay from sklearn.model_selection import ParameterGrid random_seed = 35 from sklearn.metrics import accuracy_score import sklearn.metrics as metrics import pyarrow.feather as pf arrtab = pf.read_table("./data_1.arr") df = arrtab.to_pandas(timestamp_as_object=True) df[['L', 'W', 'D']] = df['lotWaferDie'].str.split('-|_', expand=True) df = df.drop(columns=['lotWaferDie']) #df = df.drop(columns=['DefArea']) #df = df.drop(columns=['xidx']) #df = df.drop(columns=['yidx']) #from sklearn.preprocessing import LabelEncoder #label_encoder = LabelEncoder() #df['lotWaferDie'] = label_encoder.fit_transform(df['lotWaferDie']) # use one-hot encoding for col in df.select_dtypes(object).columns: df = pd.concat([ df.drop(col, axis=1), pd.get_dummies(df[col], prefix=('d_' + col)) ], axis=1) # Train/Validation/Test split from sklearn.model_selection import train_test_split # Xtrain, Xrest, ytrain, yrest = train_test_split( df.drop(columns=['fail']), df.fail, test_size=0.4, random_state=random_seed, stratify=df.fail ) Xtest, Xval, ytest, yval = train_test_split( Xrest, yrest, test_size=0.5, random_state=random_seed, stratify=yrest ) param_grid = { 'max_depth': range(1, 40), 'criterion': ['entropy'] } param_comb = ParameterGrid(param_grid) from sklearn.metrics import f1_score val_acc = [] param_f1_pairs = [] for i,params in enumerate(param_comb): dt = DecisionTreeClassifier(max_depth=params['max_depth'], criterion=params['criterion']) dt.fit(Xtrain, ytrain) val_acc.append(metrics.accuracy_score(yval, dt.predict(Xval))) val_score = accuracy_score(yval, dt.predict(Xval)) print(f"Iteration {i+1}/{len(param_comb)} - Validation Score: {val_score:.4f} - Parameters: {params}") predicted_classes = dt.predict(Xtest) f1 = f1_score(ytest, predicted_classes) param_f1_pairs.append((params, f1)) # Uložení parametrů a příslušného F1 skóre print("F1 skóre:", f1)