Skip to content

Commit 4cefb5a

Browse files
committed
Matrix is the primary, DataFrame is the secondary data type in robcov
1 parent 808c0cf commit 4cefb5a

1 file changed

Lines changed: 15 additions & 13 deletions

File tree

src/mve.jl

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ import ..Basis:
1212
import ..Diagnostics: mahalanobisSquaredMatrix
1313

1414

15-
function enlargesubset(initialsubset, data::DataFrame, dataMatrix::AbstractMatrix, h::Int)
16-
n, p = size(dataMatrix)
15+
function enlargesubset(initialsubset, data::AbstractMatrix, h::Int)
16+
n, p = size(data)
1717

1818
basicsubset = copy(initialsubset)
1919

@@ -25,7 +25,7 @@ function enlargesubset(initialsubset, data::DataFrame, dataMatrix::AbstractMatri
2525

2626
while length(basicsubset) < h
2727
meanvector .= applyColumns(mean, data[basicsubset, :])
28-
covmatrix .= cov(dataMatrix[basicsubset, :])
28+
covmatrix .= cov(data[basicsubset, :])
2929
md2mat .=
3030
mahalanobisSquaredMatrix(data, meanvector = meanvector, covmatrix = covmatrix)
3131
md2 .= diag(md2mat)
@@ -36,9 +36,9 @@ function enlargesubset(initialsubset, data::DataFrame, dataMatrix::AbstractMatri
3636
end
3737

3838

39-
function robcov(data::DataFrame; alpha = 0.01, estimator = :mve)
40-
dataMatrix = Matrix(data)
41-
n, p = size(dataMatrix)
39+
function robcov(data::Matrix; alpha = 0.01, estimator = :mve)
40+
41+
n, p = size(data)
4242
chisquared = Chisq(p)
4343
chisqcrit = quantile(chisquared, 1.0 - alpha)
4444
c = sqrt(chisqcrit)
@@ -65,8 +65,8 @@ function robcov(data::DataFrame; alpha = 0.01, estimator = :mve)
6565
goal = Inf
6666
try
6767
initialsubset .= sample(indices, k, replace = false)
68-
hsubset .= enlargesubset(initialsubset, data, dataMatrix, h)
69-
covmatrix .= cov(dataMatrix[hsubset, :])
68+
hsubset .= enlargesubset(initialsubset, data, h)
69+
covmatrix .= cov(data[hsubset, :])
7070
if estimator == :mve
7171
meanvector .= applyColumns(mean, data[hsubset, :])
7272
md2mat .= mahalanobisSquaredMatrix(
@@ -88,8 +88,10 @@ function robcov(data::DataFrame; alpha = 0.01, estimator = :mve)
8888
besthsubset .= hsubset
8989
end
9090
end
91+
92+
9193
meanvector .= applyColumns(mean, data[besthsubset, :])
92-
covmatrix .= cov(dataMatrix[besthsubset, :])
94+
covmatrix .= cov(data[besthsubset, :])
9395
md2 .= diag(
9496
mahalanobisSquaredMatrix(
9597
data,
@@ -143,11 +145,11 @@ Van Aelst, Stefan, and Peter Rousseeuw. "Minimum volume ellipsoid." Wiley
143145
Interdisciplinary Reviews: Computational Statistics 1.1 (2009): 71-82.
144146
"""
145147
function mve(data::DataFrame; alpha = 0.01)
146-
robcov(data, alpha = alpha, estimator = :mve)
148+
robcov(Matrix(data), alpha = alpha, estimator = :mve)
147149
end
148150

149151
function mve(data::AbstractMatrix{Float64}; alpha = 0.01)
150-
return mve(DataFrame(data), alpha = alpha)
152+
return mve(data, alpha = alpha)
151153
end
152154

153155

@@ -187,11 +189,11 @@ Rousseeuw, Peter J., and Katrien Van Driessen. "A fast algorithm for the minimum
187189
determinant estimator." Technometrics 41.3 (1999): 212-223.
188190
"""
189191
function mcd(data::DataFrame; alpha = 0.01)
190-
robcov(data, alpha = alpha, estimator = :mcd)
192+
robcov(Matrix(data), alpha = alpha, estimator = :mcd)
191193
end
192194

193195
function mcd(data::AbstractMatrix{Float64}; alpha = 0.01)
194-
return mcd(DataFrame(data), alpha = alpha)
196+
return mcd(data, alpha = alpha)
195197
end
196198

197199

0 commit comments

Comments
 (0)