Sabtu, 10 Mei 2025

RandomOverSmapler Imbalanced Classification Python

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline
import numpy as np

def train_and_evaluate_with_sampling(X, y, model, oversampler, n_splits=5, random_state=42):
  
    # Inisialisasi StratifiedKFold
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    # List untuk menyimpan skor tiap fold
    acc_scores = []
    f1_scores = []
    precision_scores = []
    recall_scores = []


    for fold, (train_index, test_index) in enumerate(skf.split(X, y), 1):
        X_train = X[train_index]
        y_train = y[train_index]
        X_test = X[test_index]
        y_test = y[test_index]


   
        
        # Lakukan oversampling hanya pada data training
        X_train_sampled, y_train_sampled  = oversampler.fit_resample(X_train, y_train)
        
        # Latih model
        model.fit( X_train_sampled, y_train_sampled)
        
        # Prediksi
        y_pred = model.predict(X_test)
        
        # Hitung skor
        acc_scores.append(accuracy_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred, average='macro'))
        precision_scores.append(precision_score(y_test, y_pred, average='macro'))
        recall_scores.append(recall_score(y_test, y_pred, average='macro'))
    
    # Hitung rata-rata skor
    results = {
        'mean_accuracy': np.mean(acc_scores),
        'mean_f1_macro': np.mean(f1_scores),
        'mean_precision_macro': np.mean(precision_scores),
        'mean_recall_macro': np.mean(recall_scores)
    }
    
    return results


# Contoh penggunaan: ROS # Asumsikan X dan y sudah didefinisikan sebelumnya
dt_model = DecisionTreeClassifier(random_state=42)
ros = RandomOverSampler(random_state=42)

# Panggil fungsi evaluasi
results = train_and_evaluate_with_sampling(X, y, dt_model, ros)
# Tampilkan hasil
print("Hasil Evaluasi dengan ROS:")
print(f"Accuracy rata-rata: {results['mean_accuracy']:.4f}")
print(f"F1 Macro rata-rata: {results['mean_f1_macro']:.4f}")
print(f"Precision Macro rata-rata: {results['mean_precision_macro']:.4f}")
print(f"Recall Macro rata-rata: {results['mean_recall_macro']:.4f}")

Hasil Evaluasi dengan ROS:
Accuracy rata-rata: 0.9910
F1 Macro rata-rata: 0.7680
Precision Macro rata-rata: 0.7759
Recall Macro rata-rata: 0.7628