python-machine-learning/KNeighbour.py at main · KoVoidG/python-machine-learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import plot_tree

file = 'KFC.xlsx'
sheet = 'Clean_data'
df = pd.read_excel(file, sheet_name=sheet)

st.header('K-Neighbour Classification')
st.write('Occupation prediction')

le = LabelEncoder()

df['orderType_enc']   = le.fit_transform(df['orderType'].astype(str))
df['orderMethod_enc'] = le.fit_transform(df['orderMethod'].astype(str))

df_x = df[["age_enc","budget_enc","orderType_enc","orderMethod_enc"]]
df_y = df[[ "occupation"]]

scaler = StandardScaler()
df_x_scaled = scaler.fit_transform(df_x)
feature_list = df_x.columns
class_list = np.sort(df_y["occupation"].unique())

k_sel = st.selectbox("Select K",list(range(1,16)),index=2,)

st.metric(label="Total Count", value = len(df_y),)
for class_iris in class_list:
    st.metric(label=class_iris,value=len(df_y[df_y["occupation"]==class_iris]),)

for feature in feature_list:
    fig_boxplot = px.box(df, x= "occupation", y= feature, color = "occupation")
    st.plotly_chart(fig_boxplot)

test_ratio = st.number_input("Select Ratio for Test Set", value = 0.2,)

x_train, x_test, y_train, y_test = train_test_split(
    df_x_scaled, df_y, test_size = test_ratio, random_state=99
)
knn = KNeighborsClassifier(n_neighbors = k_sel)
knn.fit(x_train,y_train)

y_pred = knn.predict(x_test)
st.write(f"Test Accuracy:{accuracy_score(y_test,y_pred):.2f}")

orderType_enc = st.selectbox(
    "Order Type",
    options=[0, 1, 2, 3],
    format_func=lambda x: {
        0: 'group',
        1: 'individual',
        2: 'promotion',
        3: 'snack_sharing'
    }[x]
)

orderMethod_enc = st.selectbox(
    "Order Method",
    options=[0, 1, 2],
    format_func=lambda x: {
        0: 'app',
        1: 'counter',
        2: 'kiosk'
    }[x]
)

age_enc =st.selectbox(
    "Age",
    options=[1, 2, 3, 4, 5],
    format_func=lambda x: {
        1: 'under 18',
        2: '18-22',
        3: '23-27',
        4: '28-35',
        5: 'above 35'
    }[x]
)
budget_enc = st.selectbox(
    "Budget",
    options=[1, 2, 3, 4],
    format_func=lambda x: {
        1: 'Below 100 Baht',
        2: '100 - 199 Baht',
        3: '200 - 299 Baht',
        4: '300+ Baht'
    }[x]
)

predict_data = pd.DataFrame({"age_enc":[age_enc],"budget_enc":[budget_enc],"orderType_enc":[orderType_enc],"orderMethod_enc":[orderMethod_enc]})
val = knn.predict(scaler.transform(predict_data))[0]
accuracy = accuracy_score(y_test, y_pred)
st.write("Predicted Value:",val)
st.write("Test Accuracy",accuracy)