# Run this cell and add the dataset to this directory
from google.colab import files
uploaded = files.upload()

Saving TelcoChurnPrediction.csv to TelcoChurnPrediction.csv

# Load the dataset
import pandas as pd
data = pd.read_csv("TelcoChurnPrediction.csv")
print("Dataset loaded successfully")

# Taking a look at the first few rows
data.head(5)

Dataset loaded successfully

# Calculate churn distribution
churn_counts = data['Churn'].value_counts()
print(churn_counts)
print(f"Churn Rate: {churn_counts['Yes'] / len(data):.2%}")

Churn
No     5174
Yes    1869
Name: count, dtype: int64
Churn Rate: 26.54%

# Quick data sanity checks
print(data.isnull().sum())        # check for missing values per column
print(data.nunique())            # how many unique values in each column (to spot potential ID columns or constants)

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64
customerID          7043
gender                 2
SeniorCitizen          2
Partner                2
Dependents             2
tenure                73
PhoneService           2
MultipleLines          3
InternetService        3
OnlineSecurity         3
OnlineBackup           3
DeviceProtection       3
TechSupport            3
StreamingTV            3
StreamingMovies        3
Contract               3
PaperlessBilling       2
PaymentMethod          4
MonthlyCharges      1585
TotalCharges        6531
Churn                  2
dtype: int64

# Drop unnecessary columns
data = data.drop('customerID', axis=1)

# One-hot encode categorical features

categorical_columns = ['gender','Partner', 'Dependents', 'PhoneService',
                       'MultipleLines', 'InternetService', 'OnlineSecurity',
                       'OnlineBackup', 'DeviceProtection', 'TechSupport',
                       'StreamingTV', 'StreamingMovies', 'Contract',
                       'PaperlessBilling', 'PaymentMethod']

data_encoded = pd.get_dummies(data, columns=categorical_columns, drop_first=True)
data_encoded.head(5)

# Checking the datatypes of categories in our new dataset
print(data_encoded.dtypes)

SeniorCitizen                              int64
tenure                                     int64
MonthlyCharges                           float64
TotalCharges                              object
Churn                                     object
gender_Male                                 bool
Partner_Yes                                 bool
Dependents_Yes                              bool
PhoneService_Yes                            bool
MultipleLines_No phone service              bool
MultipleLines_Yes                           bool
InternetService_Fiber optic                 bool
InternetService_No                          bool
OnlineSecurity_No internet service          bool
OnlineSecurity_Yes                          bool
OnlineBackup_No internet service            bool
OnlineBackup_Yes                            bool
DeviceProtection_No internet service        bool
DeviceProtection_Yes                        bool
TechSupport_No internet service             bool
TechSupport_Yes                             bool
StreamingTV_No internet service             bool
StreamingTV_Yes                             bool
StreamingMovies_No internet service         bool
StreamingMovies_Yes                         bool
Contract_One year                           bool
Contract_Two year                           bool
PaperlessBilling_Yes                        bool
PaymentMethod_Credit card (automatic)       bool
PaymentMethod_Electronic check              bool
PaymentMethod_Mailed check                  bool
dtype: object

# Removing blank space from the column
data_encoded['TotalCharges'] = data_encoded['TotalCharges'].str.strip()

# Replace empty strings with NaN and convert to numeric
data_encoded['TotalCharges'] = pd.to_numeric(data_encoded['TotalCharges'], errors='coerce')

# Check the datatypes again
print(data_encoded.dtypes)

SeniorCitizen                              int64
tenure                                     int64
MonthlyCharges                           float64
TotalCharges                             float64
Churn                                     object
gender_Male                                 bool
Partner_Yes                                 bool
Dependents_Yes                              bool
PhoneService_Yes                            bool
MultipleLines_No phone service              bool
MultipleLines_Yes                           bool
InternetService_Fiber optic                 bool
InternetService_No                          bool
OnlineSecurity_No internet service          bool
OnlineSecurity_Yes                          bool
OnlineBackup_No internet service            bool
OnlineBackup_Yes                            bool
DeviceProtection_No internet service        bool
DeviceProtection_Yes                        bool
TechSupport_No internet service             bool
TechSupport_Yes                             bool
StreamingTV_No internet service             bool
StreamingTV_Yes                             bool
StreamingMovies_No internet service         bool
StreamingMovies_Yes                         bool
Contract_One year                           bool
Contract_Two year                           bool
PaperlessBilling_Yes                        bool
PaymentMethod_Credit card (automatic)       bool
PaymentMethod_Electronic check              bool
PaymentMethod_Mailed check                  bool
dtype: object

# Converting target to numerical & separating X (features) and y (target)
X = data_encoded.drop('Churn', axis=1)
y = data_encoded['Churn'].apply(lambda x: 1 if x == 'Yes' or x == 1 else 0)

# importing necessary tools
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 707,
    stratify = y) # keeps class proportion same in training and testing sets

# Verify the sizes of your splits
print("Training size:", X_train.shape[0], "Testing size:", X_test.shape[0])
print("Churn rate in training:", y_train.mean(), "Churn rate in test:", y_test.mean())

Training size: 5634 Testing size: 1409
Churn rate in training: 0.2653532126375577 Churn rate in test: 0.2654364797728886

print("All features:", X.columns)

All features: Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges',
       'gender_Male', 'Partner_Yes', 'Dependents_Yes', 'PhoneService_Yes',
       'MultipleLines_No phone service', 'MultipleLines_Yes',
       'InternetService_Fiber optic', 'InternetService_No',
       'OnlineSecurity_No internet service', 'OnlineSecurity_Yes',
       'OnlineBackup_No internet service', 'OnlineBackup_Yes',
       'DeviceProtection_No internet service', 'DeviceProtection_Yes',
       'TechSupport_No internet service', 'TechSupport_Yes',
       'StreamingTV_No internet service', 'StreamingTV_Yes',
       'StreamingMovies_No internet service', 'StreamingMovies_Yes',
       'Contract_One year', 'Contract_Two year', 'PaperlessBilling_Yes',
       'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],
      dtype='object')

# Import the necessary tools
from sklearn.tree import DecisionTreeClassifier

# Building a basic CART (decision tree)
clf = DecisionTreeClassifier(random_state = 707)
clf.fit(X_train, y_train)

# Making predictions on the test set
y_pred = clf.predict(X_test)

# Import the necessary tools
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Calculating evaluation metrics
baseline_accuracy = accuracy_score(y_test, y_pred)
baseline_precision = precision_score(y_test, y_pred)
baseline_recall = recall_score(y_test, y_pred)
print("Baseline Accuracy:", baseline_accuracy)
print("Baseline Precision:", baseline_precision)
print("Baseline Recall:", baseline_recall)

Confusion Matrix:
[[841 194]
 [193 181]]
Baseline Accuracy: 0.7253371185237757
Baseline Precision: 0.4826666666666667
Baseline Recall: 0.4839572192513369

# Importing the necessary tools
from sklearn.model_selection import GridSearchCV

# Define a grid of hyperparameters to search over
param_grid = {
    'max_depth': [3, 4, 5, 6, 7],
    'min_samples_split': [2, 3, 10, 20, 30],
    'criterion': ['gini', 'entropy'],
    'class_weight': [None, 'balanced']
}

# Set up the grid search with cross-validation
grid_clf = GridSearchCV(DecisionTreeClassifier(random_state = 707),
                           param_grid,
                           cv = 5,           # 5-fold cross-validation on training set
                           scoring = 'f1')

# Perform GridSearchCV with cross-validation to find best hyperparameters
grid_clf.fit(X_train, y_train)

print("Best Hyperparameters:", grid_clf.best_params_)

# Retrieve the best CART model from GridSearchCV
tuned_clf = grid_clf.best_estimator_

# Use the tuned model (tuned_clf) to make predictions on the test set
y_tuned_pred = tuned_clf.predict(X_test)

Best Hyperparameters: {'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 6, 'min_samples_split': 20}

# import the necessary tools
from sklearn.metrics import classification_report

# Print the evaluation metrics for the tuned model

tuned_accuracy = accuracy_score(y_test, y_tuned_pred)
tuned_precision = precision_score(y_test, y_tuned_pred)
tuned_recall = recall_score(y_test, y_tuned_pred)
print("Tuned Accuracy:", tuned_accuracy)
print("Tuned Precision:", tuned_precision)
print("Tuned Recall:", tuned_recall)

Tuned Accuracy: 0.7118523775727467
Tuned Precision: 0.4735973597359736
Tuned Recall: 0.767379679144385

# Import necessary tools
import matplotlib.pyplot as plt
import seaborn as sns

# Displaying and plotting feature importances for the tuned model
importance = tuned_clf.feature_importances_
for feature, score in zip(X.columns, importance):
  print(f"Feature: {feature}, Importance: {score:.4f}")

plt.figure(figsize = (10, 6))
sns.barplot(x = importance, y = X.columns)
plt.title("Tuned Model Feature Importances")
plt.xlabel("Importance Score")
plt.ylabel("Feature")
plt.show()

Feature: SeniorCitizen, Importance: 0.0008
Feature: tenure, Importance: 0.1100
Feature: MonthlyCharges, Importance: 0.0655
Feature: TotalCharges, Importance: 0.0432
Feature: gender_Male, Importance: 0.0000
Feature: Partner_Yes, Importance: 0.0000
Feature: Dependents_Yes, Importance: 0.0000
Feature: PhoneService_Yes, Importance: 0.0000
Feature: MultipleLines_No phone service, Importance: 0.0030
Feature: MultipleLines_Yes, Importance: 0.0000
Feature: InternetService_Fiber optic, Importance: 0.0979
Feature: InternetService_No, Importance: 0.0000
Feature: OnlineSecurity_No internet service, Importance: 0.0000
Feature: OnlineSecurity_Yes, Importance: 0.0000
Feature: OnlineBackup_No internet service, Importance: 0.0000
Feature: OnlineBackup_Yes, Importance: 0.0002
Feature: DeviceProtection_No internet service, Importance: 0.0000
Feature: DeviceProtection_Yes, Importance: 0.0000
Feature: TechSupport_No internet service, Importance: 0.0000
Feature: TechSupport_Yes, Importance: 0.0062
Feature: StreamingTV_No internet service, Importance: 0.0000
Feature: StreamingTV_Yes, Importance: 0.0000
Feature: StreamingMovies_No internet service, Importance: 0.0175
Feature: StreamingMovies_Yes, Importance: 0.0423
Feature: Contract_One year, Importance: 0.2160
Feature: Contract_Two year, Importance: 0.3724
Feature: PaperlessBilling_Yes, Importance: 0.0018
Feature: PaymentMethod_Credit card (automatic), Importance: 0.0000
Feature: PaymentMethod_Electronic check, Importance: 0.0231
Feature: PaymentMethod_Mailed check, Importance: 0.0000

# import necessary tools
from sklearn.tree import plot_tree

# Visualizing the CART decision tree
plt.figure(figsize = (30,16))
plot_tree(tuned_clf, feature_names = X.columns,
          class_names = ["No Churn", "Churn"],
          filled = True, rounded = True, fontsize = 12)
plt.title("Tuned CART Decision Tree")
plt.show()

!pip install --upgrade dtreeviz
import logging
import warnings
import dtreeviz

warnings.filterwarnings("ignore", message=".*X does not have valid feature names.*")
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)
logging.getLogger('matplotlib.axes._base').setLevel(logging.ERROR)

Collecting dtreeviz
  Downloading dtreeviz-2.2.2-py3-none-any.whl.metadata (2.4 kB)
Requirement already satisfied: graphviz>=0.9 in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (0.20.3)
Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (2.2.2)
Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (1.26.4)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (1.6.1)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (3.10.0)
Requirement already satisfied: colour in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (0.1.5)
Requirement already satisfied: pytest in /usr/local/lib/python3.11/dist-packages (from dtreeviz) (8.3.4)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (1.3.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (4.56.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (1.4.8)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (24.2)
Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (11.1.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (3.2.1)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->dtreeviz) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->dtreeviz) (2025.1)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->dtreeviz) (2025.1)
Requirement already satisfied: iniconfig in /usr/local/lib/python3.11/dist-packages (from pytest->dtreeviz) (2.0.0)
Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/lib/python3.11/dist-packages (from pytest->dtreeviz) (1.5.0)
Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->dtreeviz) (1.13.1)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->dtreeviz) (1.4.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->dtreeviz) (3.5.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib->dtreeviz) (1.17.0)
Downloading dtreeviz-2.2.2-py3-none-any.whl (91 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 91.8/91.8 kB 2.3 MB/s eta 0:00:00
Installing collected packages: dtreeviz
Successfully installed dtreeviz-2.2.2

#Make sure that all missing values have been replaced with 0
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)

# Now create the visualization using the new API:
m = dtreeviz.model(tuned_clf, X_train, y_train,
                   feature_names = X.columns,
                   class_names = ["No Churn", "Churn"])
m.view()

# import necessary tools
from sklearn.tree import export_text

# Using export_text to extract the questions asked at each node
tree_rules = export_text(tuned_clf, feature_names = X.columns)
print("Decision Tree Rules:", tree_rules)

Decision Tree Rules: |--- Contract_Two year <= 0.50
|   |--- Contract_One year <= 0.50
|   |   |--- InternetService_Fiber optic <= 0.50
|   |   |   |--- tenure <= 5.50
|   |   |   |   |--- StreamingMovies_No internet service <= 0.50
|   |   |   |   |   |--- TechSupport_Yes <= 0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- TechSupport_Yes >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- StreamingMovies_No internet service >  0.50
|   |   |   |   |   |--- TotalCharges <= 20.93
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- TotalCharges >  20.93
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- tenure >  5.50
|   |   |   |   |--- MonthlyCharges <= 29.18
|   |   |   |   |   |--- MonthlyCharges <= 19.43
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  19.43
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- MonthlyCharges >  29.18
|   |   |   |   |   |--- MonthlyCharges <= 64.78
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  64.78
|   |   |   |   |   |   |--- class: 0
|   |   |--- InternetService_Fiber optic >  0.50
|   |   |   |--- tenure <= 16.50
|   |   |   |   |--- TotalCharges <= 120.00
|   |   |   |   |   |--- TechSupport_Yes <= 0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- TechSupport_Yes >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- TotalCharges >  120.00
|   |   |   |   |   |--- MonthlyCharges <= 70.60
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- MonthlyCharges >  70.60
|   |   |   |   |   |   |--- class: 1
|   |   |   |--- tenure >  16.50
|   |   |   |   |--- PaymentMethod_Electronic check <= 0.50
|   |   |   |   |   |--- StreamingMovies_Yes <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- StreamingMovies_Yes >  0.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- PaymentMethod_Electronic check >  0.50
|   |   |   |   |   |--- TotalCharges <= 3308.22
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- TotalCharges >  3308.22
|   |   |   |   |   |   |--- class: 1
|   |--- Contract_One year >  0.50
|   |   |--- StreamingMovies_Yes <= 0.50
|   |   |   |--- PaymentMethod_Electronic check <= 0.50
|   |   |   |   |--- TotalCharges <= 37.08
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- TotalCharges >  37.08
|   |   |   |   |   |--- tenure <= 48.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- tenure >  48.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- PaymentMethod_Electronic check >  0.50
|   |   |   |   |--- TotalCharges <= 5626.67
|   |   |   |   |   |--- PaperlessBilling_Yes <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- PaperlessBilling_Yes >  0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- TotalCharges >  5626.67
|   |   |   |   |   |--- class: 1
|   |   |--- StreamingMovies_Yes >  0.50
|   |   |   |--- MonthlyCharges <= 94.17
|   |   |   |   |--- TotalCharges <= 3266.25
|   |   |   |   |   |--- TotalCharges <= 2984.07
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- TotalCharges >  2984.07
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- TotalCharges >  3266.25
|   |   |   |   |   |--- MonthlyCharges <= 88.60
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  88.60
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- MonthlyCharges >  94.17
|   |   |   |   |--- TotalCharges <= 6634.62
|   |   |   |   |   |--- tenure <= 19.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- tenure >  19.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- TotalCharges >  6634.62
|   |   |   |   |   |--- MonthlyCharges <= 105.62
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  105.62
|   |   |   |   |   |   |--- class: 1
|--- Contract_Two year >  0.50
|   |--- MonthlyCharges <= 92.42
|   |   |--- PaymentMethod_Electronic check <= 0.50
|   |   |   |--- SeniorCitizen <= 0.50
|   |   |   |   |--- OnlineBackup_Yes <= 0.50
|   |   |   |   |   |--- MonthlyCharges <= 24.48
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  24.48
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- OnlineBackup_Yes >  0.50
|   |   |   |   |   |--- class: 0
|   |   |   |--- SeniorCitizen >  0.50
|   |   |   |   |--- tenure <= 66.50
|   |   |   |   |   |--- tenure <= 63.00
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- tenure >  63.00
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- tenure >  66.50
|   |   |   |   |   |--- TechSupport_Yes <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- TechSupport_Yes >  0.50
|   |   |   |   |   |   |--- class: 0
|   |   |--- PaymentMethod_Electronic check >  0.50
|   |   |   |--- MultipleLines_No phone service <= 0.50
|   |   |   |   |--- TotalCharges <= 5265.40
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- TotalCharges >  5265.40
|   |   |   |   |   |--- class: 0
|   |   |   |--- MultipleLines_No phone service >  0.50
|   |   |   |   |--- class: 1
|   |--- MonthlyCharges >  92.42
|   |   |--- MonthlyCharges <= 96.30
|   |   |   |--- TotalCharges <= 6628.53
|   |   |   |   |--- tenure <= 69.50
|   |   |   |   |   |--- MonthlyCharges <= 95.22
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  95.22
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- tenure >  69.50
|   |   |   |   |   |--- class: 1
|   |   |   |--- TotalCharges >  6628.53
|   |   |   |   |--- class: 0
|   |   |--- MonthlyCharges >  96.30
|   |   |   |--- TotalCharges <= 7699.30
|   |   |   |   |--- TotalCharges <= 7689.88
|   |   |   |   |   |--- MonthlyCharges <= 103.30
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- MonthlyCharges >  103.30
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- TotalCharges >  7689.88
|   |   |   |   |   |--- class: 1
|   |   |   |--- TotalCharges >  7699.30
|   |   |   |   |--- tenure <= 68.50
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- tenure >  68.50
|   |   |   |   |   |--- gender_Male <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- gender_Male >  0.50
|   |   |   |   |   |   |--- class: 0

# Analyze churn rate for specific segment: month-to-month & no tech support
segment = data[(data['Contract']=='Month-to-month') & (data['TechSupport']=='No')]
print("Churn rate for Month-to-Month + No Tech Support customers:",
      (segment['Churn']=='Yes').mean())

Churn rate for Month-to-Month + No Tech Support customers: 0.503731343283582

	tenure	MonthlyCharges	TotalCharges	Churn	gender_Male	Partner_Yes	Dependents_Yes	PhoneService_Yes	MultipleLines_No phone service	...	StreamingTV_No internet service	StreamingTV_Yes	StreamingMovies_No internet service	StreamingMovies_Yes	Contract_One year	Contract_Two year	PaperlessBilling_Yes	PaymentMethod_Credit card (automatic)	PaymentMethod_Electronic check	PaymentMethod_Mailed check
0	1	29.85	29.85	No	False	True	False	False	True	...	False	False	False	False	False	False	True	False	True	False
1	34	56.95	1889.5	No	True	False	False	True	False	...	False	False	False	False	True	False	False	False	False	True
2	2	53.85	108.15	Yes	True	False	False	True	False	...	False	False	False	False	False	False	True	False	False	True
3	45	42.30	1840.75	No	True	False	False	False	True	...	False	False	False	False	True	False	False	False	False	False
4	2	70.70	151.65	Yes	False	False	False	True	False	...	False	False	False	False	False	False	True	False	True	False

Predicting Customer Churn with CART (Decision Trees)¶

Introduction:¶

Step 1: Dataset Loading and Overview¶

1.1 Dataset Loading¶

1.2 Exploratory Data Analysis (EDA)¶

Step 2: Data Cleaning and Preprocessing¶

2.1 Data Cleaning¶

2.2 Data Preprocessing¶

2.2.1 Converting Categorical Variables¶

2.2.2 Separating Features and Target Variables¶

2.2.3 Splitting Training and Testing Sets¶

Step 3: Building a CART (Decision Tree)¶

3.1 Building an Initial CART (Decision Tree)¶

3.2 Evaluating Initial CART (Decision Tree)¶

3.3 Tuning the CART (Decision Tree)¶

3.4 Evaluating Tuned CART (Decision Tree)¶

Step 4: Visualizing and Interpreting the CART (Decision Tree)¶

4.1 Understanding Feature Importance¶

4.2 Visualizing the Tree¶

4.2.1 sklearn¶

4.2.2 dtreeviz¶

4.2.3 export_text¶

Step 5: Results and Marketing Insights¶

Step 6: Conclusion and Next Steps¶

	customerID	gender	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	...	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
0	7590-VHVEG	Female	Yes	No	1	No	No phone service	DSL	No	...	No	No	No	No	Month-to-month	Yes	Electronic check	29.85	29.85	No
1	5575-GNVDE	Male	No	No	34	Yes	No	DSL	Yes	...	Yes	No	No	No	One year	No	Mailed check	56.95	1889.5	No
2	3668-QPYBK	Male	No	No	2	Yes	No	DSL	Yes	...	No	No	No	No	Month-to-month	Yes	Mailed check	53.85	108.15	Yes
3	7795-CFOCW	Male	No	No	45	No	No phone service	DSL	Yes	...	Yes	Yes	No	No	One year	No	Bank transfer (automatic)	42.30	1840.75	No
4	9237-HQITU	Female	No	No	2	Yes	No	Fiber optic	No	...	No	No	No	No	Month-to-month	Yes	Electronic check	70.70	151.65	Yes