DPA-08 : To apply the use of Auto encoder for feature optimization.
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import minmax_scale # Used for scaling
from sklearn.preprocessing import LabelEncoder # For label encoding
data = sns.load_dataset('iris')
X = data.drop('species', axis=1).values
y = data['species'].values
X_scaled = minmax_scale(X)
train_size = int(0.8 * X.shape[0])
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
class Autoencoder(nn.Module):
def init(self, input_dim, encoding_dim):
super(Autoencoder, self).init()
self.encoder = nn.Sequential(
nn.Linear(input_dim, encoding_dim),
nn.ReLU(True)
)
self.decoder = nn.Sequential(
nn.Linear(encoding_dim, input_dim),
nn.Sigmoid() # Use sigmoid for output layer
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
input_dim = X_train.shape[1]
encoding_dim = 2
model = Autoencoder(input_dim=input_dim, encoding_dim=encoding_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 50
batch_size = 10
for epoch in range(epochs):
model.train()
for i in range(0, len(X_train_tensor), batch_size):
batch_data = X_train_tensor[i:i + batch_size]
output = model(batch_data)
loss = criterion(output, batch_data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 10 == 0:
print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')
model.eval()
with torch.no_grad():
X_train_encoded = model.encoder(X_train_tensor).numpy()
X_test_encoded = model.encoder(X_test_tensor).numpy()
print("Original feature shape:", X_train.shape)
print("Optimized feature shape:", X_train_encoded.shape)
plt.figure(figsize=(8, 6))
plt.scatter(X_train_encoded[:, 0], X_train_encoded[:, 1], c=y_train_encoded,
cmap='viridis')
plt.xlabel('Optimized Feature 1')
plt.ylabel('Optimized Feature 2')
plt.title('Optimized Features using Autoencoder')
plt.colorbar(label='Class Label')
plt.show()