JuliaTrustworthyAI
diff --git a/‎.github/workflows/CI.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/CI.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Project.toml‎
Lines changed: 7 additions & 7 deletions b/‎Project.toml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎README.qmd‎
Lines changed: 15 additions & 27 deletions b/‎README.qmd‎
Lines changed: 15 additions & 27 deletions
diff --git a/‎_quarto.yml‎
Lines changed: 13 additions & 2 deletions b/‎_quarto.yml‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎docs/src/_intro.qmd‎
Lines changed: 24 additions & 0 deletions b/‎docs/src/_intro.qmd‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎docs/src/_metadata.yml‎
Lines changed: 4 additions & 0 deletions b/‎docs/src/_metadata.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/src/index.qmd‎
Lines changed: 9 additions & 0 deletions b/‎docs/src/index.qmd‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎docs/src/paper/appendix.qmd‎
Lines changed: 29 additions & 0 deletions b/‎docs/src/paper/appendix.qmd‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docs/src/paper/data_preprocessing/real_world_data.qmd‎
Lines changed: 132 additions & 0 deletions b/‎docs/src/paper/data_preprocessing/real_world_data.qmd‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎docs/src/paper/data_preprocessing/synthetic_data.qmd‎
Lines changed: 50 additions & 0 deletions b/‎docs/src/paper/data_preprocessing/synthetic_data.qmd‎
Lines changed: 50 additions & 0 deletions
@@ -3,7 +3,6 @@ on:
   push:
     branches:
       - main
-      - original-paper
     tags: '*'
   pull_request:
 concurrency:
 
@@ -1,6 +1,6 @@
 name = "AlgorithmicRecourseDynamics"
 uuid = "3d1ede72-abb8-4340-bf8e-2ae06849b5ec"
-authors = ["Anonymous"]
+authors = ["Patrick Altmeyer"]
 version = "0.1.0"
 
 [deps]
@@ -14,7 +14,7 @@ KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
 LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
@@ -26,19 +26,19 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
+CounterfactualExplanations = "0.1.4"
 CSV = "0.10"
 DataFrames = "1"
 Distances = "0.10"
 Flux = "0.13"
 Images = "0.25"
 KernelFunctions = "0.10"
-LaplaceRedux = "0.1"
-MLJ = "0.18, 0.19"
-MLUtils = "0.2, 0.3"
+MLJBase = "0.21.3"
+MLUtils = "0.3.1"
 Parameters = "0.12"
-Plots = "1"
+Plots = "1.37.2"
 ProgressMeter = "1"
-RCall = "0.13"
+RCall = "0.13.14"
 StatsBase = "0.33"
 julia = "1.6, 1.7, 1.8"
 
 
@@ -1,39 +1,27 @@
 ---
 format: 
-  gfm:
+  commonmark:
+    variant: -raw_html
     wrap: none
-    html-math-method: webtex
+execute: 
+  freeze: auto
+  echo: true
+  eval: true
+  output: false
+crossref:
+  fig-prefix: Figure
+  tbl-prefix: Table
+bibliography: bib.bib
+jupyter: julia-1.8
 ---
 
 [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://pat-alt.github.io/CounterfactualExplanations.jl/stable)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://pat-alt.github.io/CounterfactualExplanations.jl/dev)
 [![Build Status](https://github.com/pat-alt/CounterfactualExplanations.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/pat-alt/CounterfactualExplanations.jl/actions/workflows/CI.yml?query=branch%3Amain) 
 [![Coverage](https://codecov.io/gh/pat-alt/CounterfactualExplanations.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/pat-alt/CounterfactualExplanations.jl)
+[![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
+[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
+[![Twitter Badge](https://img.shields.io/twitter/url/https/twitter.com/paltmey.svg?style=social&label=Follow%20%40paltmey)](https://twitter.com/paltmey)
 
 # AlgorithmicRecourseDynamics
 
-`AlgorithmicRecourseDynamics.jl` is a Julia package for modelling Algorithmic Recourse Dynamics.
-
-## Research Paper 📝
-
-**Note** ⚠: You are browsing the (anonymised) [`#original-paper`](https://anonymous.4open.science/r/AlgorithmicRecourseDynamics/README.md) branch of `AlgorithmicRecourseDynamics.jl`. This branch is a static artifact corresponding to the state of the package at the time the paper was first published. It can be used to replicate the original findings of the paper. Only this branch is currently accessible as an anonymised git repository. The main repository is private and will will be open-sourced after the review process.
-
-## At a Glance
-
-The paper titles **Endogenous Macrodynamics in Algorithmic Recourse** is currently under review and not yet published. You can find
-a preprint along with other resources right here on this branch of the
-repository:
-
-- [Paper](paper/paper.pdf)
-- [Notebooks](dev/notebooks/)
-- [Supplementary Appendix](build/dev/notebooks/appendix.html) generated from notebooks (download the HTML and view in browser)
-- [Artifacts]() (including data and experimental results; link currently exluded due to double-blind review process) 
-
-In this work we investigate what happens if Algorithmic Recourse is actually implemented by a large number of individuals. The chart below illustrates what we mean by Endogenous Macrodynamics in Algorithmic Recourse: (a) we have a simple linear classifier trained for binary classification where samples from the negative class (y=0) are marked in blue and samples of the positive class (y=1) are marked in orange; (b) the implementation of AR for a random subset of individuals leads to a noticable domain shift; (c) as the classifier is retrained we observe a corresponding model shift; (d) as this process is repeated, the decision boundary moves away from the target class.
-
-![](paper/www/poc.png)
-
-## Paper Abstract
-
-Existing work on Counterfactual Explanations (CE) and Algorithmic Recourse (AR) has largely been limited to the static setting and focused on single individuals: given some estimated model, the goal is to find valid counterfactuals for an individual instance that fulfill various desiderata. The ability of such counterfactuals to handle dynamics like data and model drift remains a largely unexplored research challenge at this point. There has also been surprisingly little work on the related question of how the actual implementation of recourse by one individual may affect other individuals. Through this work we aim to close that gap by systematizing and extending existing knowledge. We first show that many of the existing methodologies can be collectively described by a generalized framework. We then argue that the existing framework fails to account for a hidden external cost of recourse, that only reveals itself when studying the endogenous dynamics of recourse at the group level. Through simulation experiments involving various state-of-the-art counterfactual generators and several benchmark datasets, we generate large numbers of counterfactuals and study the resulting domain and model shifts. We find that the induced shifts are substantial enough to likely impede the applicability of Algorithmic Recourse in situations that involve competition for scarce resources. Fortunately, we find various potential mitigation strategies that can be used in combination with existing approaches. Our simulation framework for studying recourse dynamics is fast and open-sourced. 
-
@@ -6,8 +6,19 @@ filters:
   - lua/abstract-to-meta.lua
   - quarto
 bibliography: bib.bib
-execute:
-  freeze: auto  # re-render only when source changes
+
+crossref:
+  fig-prefix: Figure
+  tbl-prefix: Table
+fig-format: png
+
+execute: 
+  freeze: auto
+  eval: true
+  echo: true
+  output: false
+
+jupyter: julia-1.8
 
 
 
@@ -0,0 +1,24 @@
+`AlgorithmicRecourseDynamics.jl` is a Julia package for modeling Algorithmic Recourse Dynamics.
+
+## Research Paper 📝
+
+**Note** ⚠: You are browsing the [`#original-paper`](https://github.com/pat-alt/AlgorithmicRecourseDynamics.jl/tree/original-paper) branch of `AlgorithmicRecourseDynamics.jl`. This branch is a static artifact corresponding to the state of the package at the time the paper was first published. It can be used to replicate the original findings of the paper.
+
+## At a Glance
+
+You can find resources relevant to the paper right here on this branch of the
+repository:
+
+- [Paper](paper/paper.pdf)
+- [Notebooks](dev/notebooks/)
+- [Supplementary Appendix](build/dev/notebooks/appendix.html) generated from notebooks (download the HTML and view in browser)
+- [Artifacts]() (including data and experimental results; link currently exluded due to double-blind review process) 
+
+In this work we investigate what happens if Algorithmic Recourse is actually implemented by a large number of individuals. The chart below illustrates what we mean by Endogenous Macrodynamics in Algorithmic Recourse: (a) we have a simple linear classifier trained for binary classification where samples from the negative class (y=0) are marked in blue and samples of the positive class (y=1) are marked in orange; (b) the implementation of AR for a random subset of individuals leads to a noticable domain shift; (c) as the classifier is retrained we observe a corresponding model shift; (d) as this process is repeated, the decision boundary moves away from the target class.
+
+![](paper/www/poc.png)
+
+## Paper Abstract
+
+Existing work on Counterfactual Explanations (CE) and Algorithmic Recourse (AR) has largely focused on single individuals in a static environment: given some estimated model, the goal is to find valid counterfactuals for an individual instance that fulfill various desiderata. The ability of such counterfactuals to handle dynamics like data and model drift remains a largely unexplored research challenge. There has also been surprisingly little work on the related question of how the actual implementation of recourse by one individual may affect other individuals. Through this work we aim to close that gap. We first show that many of the existing methodologies can be collectively described by a generalized framework. We then argue that the existing framework does not account for a hidden external cost of recourse, that only reveals itself when studying the endogenous dynamics of recourse at the group level. Through simulation experiments involving various state-of-the-art counterfactual generators and several benchmark datasets, we generate large numbers of counterfactuals and study the resulting domain and model shifts. We find that the induced shifts are substantial enough to likely impede the applicability of Algorithmic Recourse in some situations. Fortunately, we find various strategies to mitigate these concerns. Our simulation framework for studying recourse dynamics is fast and open-sourced. 
+
@@ -0,0 +1,4 @@
+format: 
+  commonmark:
+    variant: -raw_html
+    wrap: none
@@ -0,0 +1,9 @@
+```@meta
+CurrentModule = AlgorithmicRecourseDynamics
+```
+
+# AlgorithmicRecourseDynamics
+
+Documentation for [AlgorithmicRecourseDynamics.jl](https://github.com/pat-alt/AlgorithmicRecourseDynamics.jl).
+
+{{< include _intro.qmd >}}
@@ -0,0 +1,29 @@
+---
+title: Supplementary Appendix
+format:
+    html: 
+        self-contained: true
+        code-fold: true
+        execute:
+            echo: true
+            eval: false
+            warning: false
+        toc: true
+jupyter: julia-1.7
+---
+
+This is a supplementary appendix to the research paper **Endogenous Macrodynamics in Algorithmic Recourse**. It contains all of the experimental results, including those not highlighted in the actual paper. It also contains additional information about the proposed counterfactual generators.
+
+# Experimental Results {#sec-results}
+
+{{< include experiments/synthetic.qmd >}}
+
+{{< include experiments/real_world.qmd >}}
+
+{{< include experiments/mitigation_strategies.qmd >}}
+ 
+# Generators {#sec-generators}
+
+{{< include generators/gravitational_generator.qmd >}}
+
+{{< include generators/clap_roar_generator.qmd >}}
@@ -0,0 +1,132 @@
+---
+title: Preprocessing Real-World Data
+jupyter: julia-1.7
+---
+
+```{julia}
+using Pkg; Pkg.activate("dev")
+```
+
+```{julia}
+include("dev/utils.jl")
+using AlgorithmicRecourseDynamics
+using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra
+theme(:wong)
+output_path = output_dir("real_world")
+www_path = www_dir("real_world")
+data_path = data_dir("real_world")
+```
+
+## California Housing Data
+
+Fetching the data using Python's `sklearn`:
+
+```{python}
+from sklearn.datasets import fetch_california_housing
+df, y = fetch_california_housing(return_X_y=True, as_frame=True)
+df["target"] = y.values
+data_path = "../../artifacts/upload/data/real_world"
+import os
+df.to_csv(os.path.join(data_path,"raw/cal_housing.csv"), index=False)
+```
+
+Loading the data into Julia session:
+
+```{julia}
+using CSV, DataFrames, Statistics, StatsBase
+df = CSV.read(joinpath(data_path, "raw/cal_housing.csv"), DataFrame)
+# Features:
+X = Matrix(df[:,Not(:target)])
+dt = fit(ZScoreTransform, X, dims=1)
+StatsBase.transform!(dt, X)
+# Target:
+y = df.target
+y = Float64.(y .>= median(y)); # binary target
+# Data:
+df = DataFrame(X,:auto)
+df.target = y
+```
+
+```{julia}
+using MLUtils: undersample
+# Make DataFrames.jl work
+MLUtils.getobs(data::DataFrame, i) = data[i,:]
+MLUtils.numobs(data::DataFrame) = nrow(data)
+df_balanced = getobs(undersample(df, df.target;shuffle=true))
+```
+
+```{julia}
+CSV.write(joinpath(data_path, "cal_housing.csv"), df)
+```
+
+
+## Give Me Some Credit
+
+```{julia}
+using CSV, DataFrames, Statistics, StatsBase
+df = CSV.read(joinpath(data_path, "raw/cs-training.csv"), DataFrame)
+select!(df, Not([:Column1]))
+rename!(df, :SeriousDlqin2yrs => :target)
+mapcols!(x -> [ifelse(x_=="NA", missing, x_) for x_ in x], df)
+dropmissing!(df)
+mapcols!(x -> eltype(x) <: AbstractString ? parse.(Int, x) : x, df)
+# Features:
+X = Matrix(df[:,Not(:target)])
+dt = fit(ZScoreTransform, X, dims=1)
+StatsBase.transform!(dt, X)
+# Target:
+y = df.target
+# Data:
+df = DataFrame(X,:auto)
+df.target = y
+```
+
+```{julia}
+using MLUtils
+using MLUtils: undersample
+# Make DataFrames.jl work
+MLUtils.getobs(data::DataFrame, i) = data[i,:]
+MLUtils.numobs(data::DataFrame) = nrow(data)
+df_balanced = getobs(undersample(df, df.target;shuffle=true))
+```
+
+```{julia}
+CSV.write(joinpath(data_path, "gmsc.csv"), df_balanced)
+```
+
+## UCI Credit Card Default
+
+```{julia}
+using CSV, DataFrames, Statistics, StatsBase
+df = CSV.read(joinpath(data_path, "raw/UCI_Credit_Card.csv"), DataFrame)
+select!(df, Not([:ID, :SEX, :EDUCATION, :MARRIAGE]))
+rename!(df, "default.payment.next.month" => :target)
+dropmissing!(df)
+mapcols!(x -> eltype(x) <: AbstractString ? parse.(Int, x) : x, df)
+# Features:
+X = Matrix(df[:,Not(:target)])
+dt = fit(ZScoreTransform, X, dims=1)
+StatsBase.transform!(dt, X)
+# Target:
+y = df.target
+# Data:
+df = DataFrame(X,:auto)
+df.target = y
+```
+
+```{julia}
+using MLUtils
+using MLUtils: undersample
+# Make DataFrames.jl work
+MLUtils.getobs(data::DataFrame, i) = data[i,:]
+MLUtils.numobs(data::DataFrame) = nrow(data)
+df_balanced = getobs(undersample(df, df.target;shuffle=true))
+```
+
+```{julia}
+CSV.write(joinpath(data_path, "credit_default.csv"), df_balanced)
+```
+
+
+
+
@@ -0,0 +1,50 @@
+---
+title: Generating Synthetic Data
+jupyter: julia-1.7
+---
+
+```{julia}
+using Pkg; Pkg.activate("dev")
+```
+
+```{julia}
+include("dev/utils.jl")
+using AlgorithmicRecourseDynamics
+output_path = output_dir("synthetic")
+www_path = www_dir("synthetic")
+data_path = data_dir("synthetic")
+```
+
+
+```{julia}
+using MLJ, DataFrames, CSV
+n = 1000
+p = 2
+
+using Random
+Random.seed!(42)
+
+# Linearly separable:
+X, y = make_blobs(n, p; centers=2, center_box=(-2 => 2), cluster_std=0.1)
+df = DataFrame(X)
+df.target .= ifelse.(y.==1,0,1)
+CSV.write(joinpath(data_path, "linearly_separable.csv"),df)
+
+# Overlapping:
+X, y = make_blobs(n, p; centers=2, center_box=(-2 => 2), cluster_std=0.5)
+df = DataFrame(X)
+df.target .= ifelse.(y.==1,0,1)
+CSV.write(joinpath(data_path, "overlapping.csv"),df)
+
+# Circles:
+X, y = make_circles(n; noise=0.15, factor=0.01)
+df = DataFrame(X)
+df.target = y
+CSV.write(joinpath(data_path, "circles.csv"),df)
+
+# Moon:
+X, y = make_moons(n)
+df = DataFrame(X)
+df.target = y
+CSV.write(joinpath(data_path, "moons.csv"),df)
+```