-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKNeighbour.py
More file actions
105 lines (88 loc) · 2.94 KB
/
KNeighbour.py
File metadata and controls
105 lines (88 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import plot_tree
file = 'KFC.xlsx'
sheet = 'Clean_data'
df = pd.read_excel(file, sheet_name=sheet)
st.header('K-Neighbour Classification')
st.write('Occupation prediction')
le = LabelEncoder()
df['orderType_enc'] = le.fit_transform(df['orderType'].astype(str))
df['orderMethod_enc'] = le.fit_transform(df['orderMethod'].astype(str))
df_x = df[["age_enc","budget_enc","orderType_enc","orderMethod_enc"]]
df_y = df[[ "occupation"]]
scaler = StandardScaler()
df_x_scaled = scaler.fit_transform(df_x)
feature_list = df_x.columns
class_list = np.sort(df_y["occupation"].unique())
k_sel = st.selectbox("Select K",list(range(1,16)),index=2,)
st.metric(label="Total Count", value = len(df_y),)
for class_iris in class_list:
st.metric(label=class_iris,value=len(df_y[df_y["occupation"]==class_iris]),)
for feature in feature_list:
fig_boxplot = px.box(df, x= "occupation", y= feature, color = "occupation")
st.plotly_chart(fig_boxplot)
test_ratio = st.number_input("Select Ratio for Test Set", value = 0.2,)
x_train, x_test, y_train, y_test = train_test_split(
df_x_scaled, df_y, test_size = test_ratio, random_state=99
)
knn = KNeighborsClassifier(n_neighbors = k_sel)
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)
st.write(f"Test Accuracy:{accuracy_score(y_test,y_pred):.2f}")
orderType_enc = st.selectbox(
"Order Type",
options=[0, 1, 2, 3],
format_func=lambda x: {
0: 'group',
1: 'individual',
2: 'promotion',
3: 'snack_sharing'
}[x]
)
orderMethod_enc = st.selectbox(
"Order Method",
options=[0, 1, 2],
format_func=lambda x: {
0: 'app',
1: 'counter',
2: 'kiosk'
}[x]
)
age_enc =st.selectbox(
"Age",
options=[1, 2, 3, 4, 5],
format_func=lambda x: {
1: 'under 18',
2: '18-22',
3: '23-27',
4: '28-35',
5: 'above 35'
}[x]
)
budget_enc = st.selectbox(
"Budget",
options=[1, 2, 3, 4],
format_func=lambda x: {
1: 'Below 100 Baht',
2: '100 - 199 Baht',
3: '200 - 299 Baht',
4: '300+ Baht'
}[x]
)
predict_data = pd.DataFrame({"age_enc":[age_enc],"budget_enc":[budget_enc],"orderType_enc":[orderType_enc],"orderMethod_enc":[orderMethod_enc]})
val = knn.predict(scaler.transform(predict_data))[0]
accuracy = accuracy_score(y_test, y_pred)
st.write("Predicted Value:",val)
st.write("Test Accuracy",accuracy)