# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Data Simulation
np.random.seed(42)
n = 500

df_credit = pd.DataFrame({
"Income": np.random.normal(50000, 15000, n),
"Credit_Amount": np.random.normal(20000, 5000, n),
"Arrears": np.random.randint(0, 5, n)
})


beta0 = -5
beta1 = 0.0005
beta2 = -0.00004
beta3 = 0.8

# Probability of Default
p_default = 1 / (1 + np.exp(-(beta0 + beta1*df_credit['Credit_Amount'] +beta2*df_credit['Income'] + beta3*df_credit['Arrears'])))
df_credit["Default"] = np.random.binomial(1, p_default)

# Save the simulated credit data to a CSV file
df_credit.to_csv("credit_data.csv", index=False)

#Open data
df_credit = pd.read_csv('credit_data.csv')
df_credit.head()

# Descriptive Statistics

df_credit.describe()

# Estimate parameters of the logistic regression model using the data
X = df_credit[["Income", "Credit_Amount", "Arrears"]]
y = df_credit["Default"]

model_credit_in = LogisticRegression()
model_credit_in.fit(X, y)

# Display the parameters (in-sample)
print("Intercept:", model_credit_in.intercept_[0])
print("Coefficients:", model_credit_in.coef_[0])

Intercept: -4.915706515840034
Coefficients: [-7.81822335e-05  6.20387121e-04  6.86068503e-01]

# Define a sample client's data (Income, Credit_Amount, Arrears)
sample_client = pd.DataFrame({
    "Income": [60000],
    "Credit_Amount": [15000],
    "Arrears": [1]
})

# Predict the probability of default for the sample client
probability_default_client = model_credit_in.predict_proba(sample_client)[:, 1]

# Print the predicted probability
print(f"The predicted probability of default for the sample client is: {probability_default_client[0]:.4f}")

The predicted probability of default for the sample client is: 0.5951

# Compute odds ratios for the credit risk model
odds_ratios_credit = pd.DataFrame({
    'Feature': X.columns,
    'Odds Ratio': np.exp(model_credit_in.coef_[0])
})

print("Odds Ratios - Credit Risk Model:")
display(odds_ratios_credit)

Odds Ratios - Credit Risk Model:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model_credit_out = LogisticRegression()
model_credit_out.fit(X_train, y_train)

# Display the parameters (out-sample)
print("Intercept:", model_credit_out.intercept_[0])
print("Coefficients:", model_credit_out.coef_[0])

Intercept: -5.430045539160187
Coefficients: [-9.59264607e-05  7.33997106e-04  6.17921734e-01]

# Make predictions on the test set
y_pred = model_credit_out.predict(X_test)
print(y_pred)

[0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0
 1 1]

#Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - Credit Risk")
plt.show()

# Classification report
print(classification_report(y_test, y_pred))

#Note: The support number shows how many data points are considered for
#      calculating the metrics for each class
#      see https://www.nb-data.com/p/breaking-down-the-classification

              precision    recall  f1-score   support

           0       0.74      0.64      0.68        22
           1       0.94      0.96      0.95       128

    accuracy                           0.91       150
   macro avg       0.84      0.80      0.82       150
weighted avg       0.91      0.91      0.91       150

# Churn data simulation
np.random.seed(123)
n = 400

age = np.random.randint(18, 70, n)
months = np.random.randint(1, 60, n)
calls = np.random.poisson(2, n)

p_churn = 1 / (1 + np.exp(-( -3 + 0.1*calls -0.03*months + 0.1*age )))
churn = np.random.binomial(1, p_churn)

df_churn = pd.DataFrame({
"Age": age,
"Months": months,
"Calls": calls,
"Churn": churn
})

df_churn.head()

# Model training
X = df_churn[["Age", "Months", "Calls"]]
y = df_churn["Churn"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

model_churn = LogisticRegression()
model_churn.fit(X_train, y_train)

y_pred = model_churn.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.62      0.65        53
           1       0.72      0.76      0.74        67

    accuracy                           0.70       120
   macro avg       0.70      0.69      0.69       120
weighted avg       0.70      0.70      0.70       120

conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Oranges")
plt.title("Confusion Matrix - Customer Churn")
plt.show()

# Compute odds ratios for the churn model
odds_ratios_churn = pd.DataFrame({
    'Feature': X.columns,
    'Odds Ratio': np.exp(model_churn.coef_[0])
})

print("Odds Ratios - Customer Churn Model:")
display(odds_ratios_churn)

Odds Ratios - Customer Churn Model:

# Define a sample client's data (Age, Months, Calls)
sample_client = pd.DataFrame({
    "Age": [39],
    "Months": [12],
    "Calls": [3]
})

# Predict the probability of default for the sample client
probability_churn = model_churn.predict_proba(sample_client)[:, 1]

# Print the predicted probability
print(f"The predicted probability of churn for the sample client is: {probability_churn[0]:.4f}")

The predicted probability of churn for the sample client is: 0.6806

# Importing necessary libraries
from sklearn.cluster import KMeans

customer_data = pd.read_csv('Mall_Customers.csv')
customer_data.head()

# Visualize the data
plt.figure(figsize=(6, 4))
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', data=customer_data)
plt.title('Annual Income vs. Spending Score')
plt.xlabel('Annual Income (USD)')
plt.ylabel('Spending Score (1-100)')
plt.show()

import numpy as np
X = customer_data.iloc[:, [3, 4]].values

wcss = []
n_clu = 10
for i in range(1, n_clu+1):
    kmeans = KMeans(n_clusters = i,
                    init = "k-means++",
                    max_iter = 400,
                    n_init = 10,
                    random_state = 42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.plot(range(1,11), wcss, marker='o')
plt.title("Elbow plot")
plt.xlabel("#clusters")
plt.ylabel("WCSS(k)")
plt.grid(True)
plt.show()

kmeans = KMeans(n_clusters = 5,
                init="k-means++",
                max_iter = 100,
                n_init = 10,
                random_state = 42)

y_kmeans = kmeans.fit_predict(X)
y_kmeans[0:10]

array([4, 2, 4, 2, 4, 2, 4, 2, 4, 2], dtype=int32)

y_kmeans_series = pd.Series(y_kmeans, name='Cluster Pred')

X_clustered = pd.concat([customer_data, y_kmeans_series], axis=1)
X_clustered.head()

# Visualizing clusters
plt.figure(figsize=(6,5))
sns.scatterplot(x='Annual Income (k$)', y='Spending Score (1-100)', 
    hue='Cluster Pred', palette='Set2', data=X_clustered, s=100)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
            s=100, c="black", label="Centroid", marker="^")
plt.title("Customer Segments")
plt.show()

df_vintage = pd.read_excel("vintage.xlsx")
df_vintage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Month   36 non-null     int64
 1   Sales   36 non-null     int64
dtypes: int64(2)
memory usage: 708.0 bytes

df_vintage.head()

plt.figure(figsize=(6, 4))
plt.plot(df_vintage['Month'], df_vintage['Sales'])
plt.title('Vintage Restaurant Monthly Sales')
plt.xlabel('Month')
plt.ylabel('Sales ($1000s)')
plt.grid(True)
plt.show()

# Moving Average
df_vintage['MA_3'] = df_vintage['Sales'].rolling(window=3).mean()

# Exponential Smoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing

es_model = ExponentialSmoothing(df_vintage['Sales'], trend='add', seasonal='add', seasonal_periods=12)
es_fit = es_model.fit()
df_vintage['ES_Forecast'] = es_fit.fittedvalues

plt.figure(figsize=(8, 4))
plt.plot(df_vintage['Sales'], label='Original Sales', marker='o')
plt.plot(df_vintage['MA_3'], label='3-Month Moving Average', linestyle='--')
plt.plot(df_vintage['ES_Forecast'], label='Exponential Smoothing', linestyle='-.')
plt.title('Sales Forecasting with Moving Average and Exponential Smoothing')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.legend()
plt.show()

from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings("ignore")
model_arima = ARIMA(df_vintage['Sales'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) 
# Added seasonal order based on identified seasonality
model_arima_fit = model_arima.fit()

# Forecast for the next 12 months
forecast_periods = 12
forecast_arima = model_arima_fit.forecast(steps=forecast_periods)

# Create a DataFrame for the forecast
forecast_index = range(df_vintage['Month'].max() + 1, df_vintage['Month'].max() + 1 + forecast_periods)
forecast_df = pd.DataFrame({'Month': forecast_index, 'Sales_Forecast_ARIMA': forecast_arima})

print("ARIMA Forecast for the next 12 months:")
display(forecast_df)

ARIMA Forecast for the next 12 months:

# Plot the original data and the forecast
plt.figure(figsize=(8, 4))
plt.plot(df_vintage['Month'], df_vintage['Sales'], label='Original Sales')
plt.plot(forecast_df['Month'], forecast_df['Sales_Forecast_ARIMA'], label='ARIMA Forecast', color='red', linestyle='--')
plt.title('Vintage Restaurant Sales Forecast (ARIMA)')
plt.xlabel('Month')
plt.ylabel('Sales ($1000s)')
plt.legend()
plt.grid(True)
plt.show()

# Get confidence intervals
forecast_ci = model_arima_fit.get_forecast(steps=forecast_periods).conf_int()

# Add confidence intervals to the forecast DataFrame
forecast_df['Lower_CI'] = forecast_ci.iloc[:, 0].values
forecast_df['Upper_CI'] = forecast_ci.iloc[:, 1].values

print("\nARIMA Forecast with Confidence Intervals:")
display(forecast_df)

ARIMA Forecast with Confidence Intervals:

# Plot the original data, forecast, and confidence intervals
plt.figure(figsize=(10, 5))
plt.plot(df_vintage['Month'], df_vintage['Sales'], label='Original Sales')
plt.plot(forecast_df['Month'], forecast_df['Sales_Forecast_ARIMA'], label='ARIMA Forecast', color='red', linestyle='--')
plt.fill_between(forecast_df['Month'], forecast_df['Lower_CI'], forecast_df['Upper_CI'], color='red', alpha=0.1, label='Confidence Interval')
plt.title('Vintage Restaurant Sales Forecast with Confidence Intervals (ARIMA)')
plt.xlabel('Month')
plt.ylabel('Sales ($1000s)')
plt.legend()
plt.grid(True)
plt.show()

!pip install pandas plotly dash

# Import libraries
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook"   # or "notebook_connected"
from dash import Dash, dcc, html, Input, Output

# Sample dataset: sales data for an SME
data = {
    'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'],
    'Product_A_Sales': [120, 150, 170, 130, 160, 180],
    'Product_B_Sales': [80, 90, 100, 85, 95, 105],
    'Revenue': [2000, 2300, 2500, 2100, 2400, 2700]
}

df = pd.DataFrame(data)
df

# Line chart for sales
fig_sales = px.line(df, x='Month', y=['Product_A_Sales', 'Product_B_Sales'],
                    title='Monthly Sales for Products A and B')
fig_sales.show()

# Bar chart for revenue
fig_revenue = px.bar(df, x='Month', y='Revenue', title='Monthly Revenue')
fig_revenue.show()

# Initialize the Dash app
app = Dash(__name__)

# Layout
app.layout = html.Div([
    html.H1("SME Sales Dashboard", style={'text-align': 'center'}),

    html.Label("Select Product:"),
    dcc.Dropdown(
        id='product_dropdown',
        options=[
            {'label': 'Product A', 'value': 'Product_A_Sales'},
            {'label': 'Product B', 'value': 'Product_B_Sales'}
        ],
        value='Product_A_Sales'
    ),

    dcc.Graph(id='sales_graph', figure={}),
    dcc.Graph(id='revenue_graph', figure={})
])

# Callbacks
@app.callback(
    Output('sales_graph', 'figure'),
    Input('product_dropdown', 'value')
)
def update_sales(selected_product):
    fig = px.line(df, x='Month', y=selected_product, title=f'Monthly Sales of {selected_product}')
    return fig

@app.callback(
    Output('revenue_graph', 'figure'),
    Input('product_dropdown', 'value')
)
def update_revenue(_):
    fig = px.bar(df, x='Month', y='Revenue', title='Monthly Revenue')
    return fig

# Run the app
if __name__ == '__main__':
    app.run(mode='inline')

	Income	Credit_Amount	Arrears	Default
0	57450.712295	24630.887738	3	1
1	47926.035482	29547.083202	0	1
2	59715.328072	13007.162131	2	0
3	72845.447846	22814.846183	4	1
4	46487.699379	16746.787154	2	1

	Income	Credit_Amount	Arrears	Default
count	500.000000	500.000000	500.000000	500.000000
mean	50102.569919	20159.130585	2.014000	0.898000
std	14718.798710	4889.985976	1.416268	0.302951
min	1380.989899	6515.566785	0.000000	0.000000
25%	39495.388933	17023.541301	1.000000	1.000000
50%	50191.957195	20142.657998	2.000000	1.000000
75%	59551.748812	23256.211488	3.000000	1.000000
max	107790.972360	33161.910324	4.000000	1.000000

	Month	Sales
0	1	242
1	2	235
2	3	232
3	4	178
4	5	184

	Month	Sales_Forecast_ARIMA
36	37	284.619781
37	38	265.305505
38	39	270.512378
39	40	214.573704
40	41	218.213499
41	42	172.243247
42	43	178.575979
43	44	184.907693
44	45	141.908406
45	46	158.627769
46	47	186.374581
47	48	246.884119

	Month	Sales_Forecast_ARIMA	Lower_CI	Upper_CI
36	37	284.619781	273.838851	295.400711
37	38	265.305505	254.334222	276.276787
38	39	270.512378	258.517612	282.507144
39	40	214.573704	201.951636	227.195772
40	41	218.213499	204.898500	231.528498
41	42	172.243247	158.300834	186.185659
42	43	178.575979	164.023032	193.128925
43	44	184.907693	169.772662	200.042723
44	45	141.908406	126.210019	157.606793
45	46	158.627769	142.391206	174.864331
46	47	186.374581	169.602646	203.146516
47	48	246.884119	229.643446	264.124791

Title: Python Analytics for Business Intelligence¶

From Insight to Action¶

Do you know what your data is trying to tell you?¶

Module 1: Logistic Regression for Credit Risk Analysis and Customer Churn¶

Introduction to Classification Models in Business Analytics¶

Fundamentals of Logistic Regression¶

Example: Credit Default Risk Prediction¶

How to use the regression equation for prediction?¶

Odds Ratios¶

Interpretation of Odds Ratios for Credit Risk¶

Model training: out-sample¶

Interpretation of classification metrics for Credit risk¶

Example: Customer Churn Prediction¶

Predict the probability of churn for a particular profile of a client¶

Module Conclusions¶

Module 2: Customer Segmentation with Clustering and Unsupervised Learning¶

Fundamentals of Clustering and Unsupervised Learning¶

Customer Segmentation using K-means¶

Example: Mall Customer¶

End-to-end K-means algorithm¶

Elbow Method: Choosing optimal number of clusters¶

Implementation of the K-Means Algorithm¶

Integrating Clustering Results into the Dataset¶

Visualizing Clusters and Centroids¶

Business Interpretation of Clusters¶

Key Takeaways¶

Module 3: Time Series Analysis for Sales and Demand Forecasting¶

Understanding Time Series Patterns¶

ARIMA: Mathematical Foundation¶

ARIMA(p, d, q) General Model¶

Key Points for Forecasting Sales¶

Module 4: Dashboards for Business Reporting¶

Introduction to Dashboards¶

Key Features of Effective Dashboards¶

Setting Up the Environment¶

Exploratory Visualizations¶

Building an Interactive Dashboard¶

Practical Applications¶

Take Away¶

Conclusion¶

	CustomerID	Genre	Age	Annual Income (k$)	Spending Score (1-100)
0	1	Male	19	15	39
1	2	Male	21	15	81
2	3	Female	20	16	6
3	4	Female	23	16	77
4	5	Female	31	17	40

	Month	Product_A_Sales	Product_B_Sales	Revenue
0	Jan	120	80	2000
1	Feb	150	90	2300
2	Mar	170	100	2500
3	Apr	130	85	2100
4	May	160	95	2400
5	Jun	180	105	2700

	Feature	Odds Ratio
0	Income	0.999922
1	Credit_Amount	1.000621
2	Arrears	1.985893

	Age	Months	Calls	Churn
0	63	33	0	1
1	20	15	1	0
2	46	3	2	1
3	52	28	2	1
4	56	27	1	1

	Feature	Odds Ratio
0	Age	1.114219
1	Months	0.980445
2	Calls	1.111131