@@ -31,11 +31,6 @@ Loading the data into Julia session.
3131
3232``` {julia}
3333df = CSV.read(joinpath(data_path, "raw/cal_housing.csv"), DataFrame)
34- # # Features:
35- # X = Matrix(df[:,Not(:target)])
36- # dt = StatsBase.fit(ZScoreTransform, X, dims=1)
37- # StatsBase.transform!(dt, X)
38- # df = DataFrame(X,:auto)
3934# Target:
4035y = df.target
4136y = Float64.(y .>= median(y)); # binary target (positive outcome)
@@ -55,10 +50,19 @@ All features are continuous:
5550schema(df_balanced)
5651```
5752
53+ Feature transformation:
54+
55+ ``` {julia}
56+ transformer = Standardizer(count=true)
57+ mach = MLJBase.fit!(machine(transformer, df_balanced[:,Not(:target)]))
58+ X = MLJBase.transform(mach, df_balanced[:,Not(:target)])
59+ schema(X)
60+ ```
61+
5862Turning the data into ` CounterfactualData ` :
5963
6064``` {julia}
61- X = Matrix(df_balanced[:,Not(:target)] )
65+ X = Matrix(X )
6266X = permutedims(X)
6367y = permutedims(df_balanced.target)
6468data = CounterfactualData(X,y)
@@ -83,12 +87,6 @@ rename!(df, :SeriousDlqin2yrs => :target)
8387mapcols!(x -> [ifelse(x_=="NA", missing, x_) for x_ in x], df)
8488dropmissing!(df)
8589mapcols!(x -> eltype(x) <: AbstractString ? parse.(Int, x) : x, df)
86- # # Features:
87- # X = Matrix(df[:,Not(:target)])
88- # dt = StatsBase.fit(ZScoreTransform, X, dims=1)
89- # StatsBase.transform!(dt, X)
90- # df = DataFrame(X,:auto)
91- # Target:
9290df.target .= map(y -> y == 0 ? 1 : 0, df.target) # postive outcome = no delinquency
9391```
9492
@@ -104,10 +102,19 @@ All features are continuous:
104102schema(df_balanced)
105103```
106104
105+ Feature transformation:
106+
107+ ``` {julia}
108+ transformer = Standardizer(count=true)
109+ mach = MLJBase.fit!(machine(transformer, df_balanced[:,Not(:target)]))
110+ X = MLJBase.transform(mach, df_balanced[:,Not(:target)])
111+ schema(X)
112+ ```
113+
107114Turning the data into ` CounterfactualData ` :
108115
109116``` {julia}
110- X = Matrix(df_balanced[:,Not(:target)] )
117+ X = Matrix(X )
111118X = permutedims(X)
112119y = permutedims(df_balanced.target)
113120data = CounterfactualData(X,y)
@@ -148,13 +155,13 @@ df_balanced = getobs(undersample(df, df.target;shuffle=true))[1]
148155schema(df_balanced)
149156```
150157
151- One-hot encoding :
158+ Feature transformation :
152159
153160``` {julia}
154- hot = OneHotEncoder ()
155- mach = MLJBase.fit!(machine(hot , df_balanced))
156- df_balanced = MLJBase.transform(mach, df_balanced)
157- schema(df_balanced )
161+ transformer = Standardizer(count=true) |> ContinuousEncoder ()
162+ mach = MLJBase.fit!(machine(transformer , df_balanced[:,Not(:target)] ))
163+ X = MLJBase.transform(mach, df_balanced[:,Not(:target)] )
164+ schema(X )
158165```
159166
160167Categorical indices:
@@ -170,7 +177,7 @@ features_categorical = [
170177Preparing for use with ` CounterfactualExplanations.jl ` :
171178
172179``` {julia}
173- X = Matrix(df_balanced[:,Not(:target)] )
180+ X = Matrix(X )
174181X = permutedims(X)
175182y = permutedims(df_balanced.target)
176183data = CounterfactualData(
0 commit comments