@@ -48,18 +48,17 @@ using MLUtils: undersample
4848# Make DataFrames.jl work
4949MLUtils.getobs(data::DataFrame, i) = data[i,:]
5050MLUtils.numobs(data::DataFrame) = nrow(data)
51- df_balanced = getobs(undersample(df, df.target;shuffle=true))
51+ df_balanced = getobs(undersample(df, df.target;shuffle=true))[1]
5252```
5353
5454``` {julia}
55- CSV.write(joinpath(data_path, "cal_housing.csv"), df )
55+ CSV.write(joinpath(data_path, "cal_housing.csv"), df_balanced )
5656```
5757
5858
5959## Give Me Some Credit
6060
6161``` {julia}
62- using CSV, DataFrames, Statistics, StatsBase
6362df = CSV.read(joinpath(data_path, "raw/cs-training.csv"), DataFrame)
6463select!(df, Not([:Column1]))
6564rename!(df, :SeriousDlqin2yrs => :target)
@@ -68,7 +67,7 @@ dropmissing!(df)
6867mapcols!(x -> eltype(x) <: AbstractString ? parse.(Int, x) : x, df)
6968# Features:
7069X = Matrix(df[:,Not(:target)])
71- dt = fit(ZScoreTransform, X, dims=1)
70+ dt = StatsBase. fit(ZScoreTransform, X, dims=1)
7271StatsBase.transform!(dt, X)
7372# Target:
7473y = df.target
@@ -83,7 +82,7 @@ using MLUtils: undersample
8382# Make DataFrames.jl work
8483MLUtils.getobs(data::DataFrame, i) = data[i,:]
8584MLUtils.numobs(data::DataFrame) = nrow(data)
86- df_balanced = getobs(undersample(df, df.target;shuffle=true))
85+ df_balanced = getobs(undersample(df, df.target;shuffle=true))[1]
8786```
8887
8988``` {julia}
@@ -93,30 +92,21 @@ CSV.write(joinpath(data_path, "gmsc.csv"), df_balanced)
9392## UCI Credit Card Default
9493
9594``` {julia}
96- using CSV, DataFrames, Statistics, StatsBase
9795df = CSV.read(joinpath(data_path, "raw/UCI_Credit_Card.csv"), DataFrame)
98- select!(df, Not([:ID, :SEX, :EDUCATION, :MARRIAGE ]))
96+ select!(df, Not([:ID]))
9997rename!(df, "default.payment.next.month" => :target)
10098dropmissing!(df)
99+ df.SEX = categorical(df.SEX)
100+ df.EDUCATION = categorical(df.EDUCATION)
101+ df.MARRIAGE = categorical(df.MARRIAGE)
101102mapcols!(x -> eltype(x) <: AbstractString ? parse.(Int, x) : x, df)
102- # Features:
103- X = Matrix(df[:,Not(:target)])
104- dt = fit(ZScoreTransform, X, dims=1)
105- StatsBase.transform!(dt, X)
106- # Target:
107- y = df.target
108- # Data:
109- df = DataFrame(X,:auto)
110- df.target = y
111103```
112104
113105``` {julia}
114- using MLUtils
115- using MLUtils: undersample
116106# Make DataFrames.jl work
117107MLUtils.getobs(data::DataFrame, i) = data[i,:]
118108MLUtils.numobs(data::DataFrame) = nrow(data)
119- df_balanced = getobs(undersample(df, df.target;shuffle=true))
109+ df_balanced = getobs(undersample(df, df.target;shuffle=true))[1]
120110```
121111
122112``` {julia}
0 commit comments