Sabtu, 10 Mei 2025

Imbalanced DecisionTree Classification

from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.datasets import make_classification
from collections import Counter

# Contoh data tidak seimbang
X, y = make_classification(n_classes=2, weights=[0.9, 0.1], random_state=42)

# Sebelum resampling
print("Sebelum resampling:", Counter(y))  # Output: {0: 900, 1: 100} (contoh)

# RandomUnderSampler (undersampling)
rus = RandomUnderSampler(random_state=42)
X_under, y_under = rus.fit_resample(X, y)
print("Setelah undersampling:", Counter(y_under))  # Output: {0: 100, 1: 100}

# RandomOverSampler (oversampling)
ros = RandomOverSampler(random_state=42)
X_over, y_over = ros.fit_resample(X, y)
print("Setelah oversampling:", Counter(y_over))  # Output: {0: 900, 1: 900}

# Train model Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_under, y_under)  # Bisa juga pakai X_over/y_over