from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline
import numpy as np
def train_and_evaluate_with_sampling(X, y, model, oversampler, n_splits=5, random_state=42):
# Inisialisasi StratifiedKFold
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
# List untuk menyimpan skor tiap fold
acc_scores = []
f1_scores = []
precision_scores = []
recall_scores = []
for fold, (train_index, test_index) in enumerate(skf.split(X, y), 1):
X_train = X[train_index]
y_train = y[train_index]
X_test = X[test_index]
y_test = y[test_index]
# Lakukan oversampling hanya pada data training
X_train_sampled, y_train_sampled = oversampler.fit_resample(X_train, y_train)
# Latih model
model.fit( X_train_sampled, y_train_sampled)
# Prediksi
y_pred = model.predict(X_test)
# Hitung skor
acc_scores.append(accuracy_score(y_test, y_pred))
f1_scores.append(f1_score(y_test, y_pred, average='macro'))
precision_scores.append(precision_score(y_test, y_pred, average='macro'))
recall_scores.append(recall_score(y_test, y_pred, average='macro'))
# Hitung rata-rata skor
results = {
'mean_accuracy': np.mean(acc_scores),
'mean_f1_macro': np.mean(f1_scores),
'mean_precision_macro': np.mean(precision_scores),
'mean_recall_macro': np.mean(recall_scores)
}
return results
# Contoh penggunaan: ROS
# Asumsikan X dan y sudah didefinisikan sebelumnya
dt_model = DecisionTreeClassifier(random_state=42)
ros = RandomOverSampler(random_state=42)
# Panggil fungsi evaluasi
results = train_and_evaluate_with_sampling(X, y, dt_model, ros)
# Tampilkan hasil
print("Hasil Evaluasi dengan ROS:")
print(f"Accuracy rata-rata: {results['mean_accuracy']:.4f}")
print(f"F1 Macro rata-rata: {results['mean_f1_macro']:.4f}")
print(f"Precision Macro rata-rata: {results['mean_precision_macro']:.4f}")
print(f"Recall Macro rata-rata: {results['mean_recall_macro']:.4f}")
Hasil Evaluasi dengan ROS:
Accuracy rata-rata: 0.9910
F1 Macro rata-rata: 0.7680
Precision Macro rata-rata: 0.7759
Recall Macro rata-rata: 0.7628