From 8fc5e93f13a3b6f0fec35e67e0c1ea5cb3beace4 Mon Sep 17 00:00:00 2001 From: Tony Kelman Date: Mon, 3 Apr 2017 22:19:39 -0400 Subject: [PATCH] Refactor Julia ktruss implementation Use CHOLMOD.Sparse for sparse matmul, to avoid the cost of sorting the row indices when that isn't necessary Devectorize the various sums over columns, instead iterate over each nonzero in the sparsity structure directly Results in about a 3x speedup on amazon0302 --- .../ktruss/code/julia/ktruss.jl | 85 +++++++++++++------ 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/SubgraphIsomorphism/ktruss/code/julia/ktruss.jl b/SubgraphIsomorphism/ktruss/code/julia/ktruss.jl index 9bfff12..3195a63 100644 --- a/SubgraphIsomorphism/ktruss/code/julia/ktruss.jl +++ b/SubgraphIsomorphism/ktruss/code/julia/ktruss.jl @@ -1,34 +1,71 @@ +const Chol = if isdefined(Base, :SparseArrays) + Base.SparseArrays.CHOLMOD +else + Base.SparseMatrix.CHOLMOD +end + +colptr0(A::Chol.Sparse, col) = unsafe_load(unsafe_load(A.p).p, col) +rowval(A::Chol.Sparse, j) = unsafe_load(unsafe_load(A.p).i, j) + 1 +nzval(A::Chol.Sparse, j) = unsafe_load(unsafe_load(A.p).x, j) + function calcx(E, m, n, k) - tmp = E.'*E; - R = E * ( tmp - spdiagm( diag(tmp) ) ); - r,c,v = findnz(R); - id = v.==2; - A = sparse( r[id], c[id], 1, m, n); - s = sum(A, 2); - x = s .< (k-2); - return(x, !x); + CSE = Chol.Sparse(E) + tmp = CSE'*CSE + + # subtract spdiagm( diag(tmp) ) from tmp in-place by setting diagonals to 0 + tmp_ptr = unsafe_load(tmp.p) + for col in 1:n + for j in colptr0(tmp, col) + 1 : colptr0(tmp, col+1) + if rowval(tmp, j) == col + unsafe_store!(tmp_ptr.x, 0, j) + end + end + end + + R = CSE * tmp + s = zeros(Int, m) + + @inbounds for col in 1:n + for j in colptr0(R, col) + 1 : colptr0(R, col+1) + if nzval(R, j) == 2 + s[rowval(R, j)] += 1 + end + end + end + + rowflags = falses(m) # rows of E with at least one nonzero in them + @inbounds for col in 1:n + for j in colptr0(CSE, col) + 1 : colptr0(CSE, col+1) + if nzval(CSE, j) != 0 + rowflags[rowval(CSE, j)] = true + end + end + end + + return find(s .>= (k-2)), sum(rowflags) end function ktruss(inc_mtx_file, k) - if ~isfile( inc_mtx_file ) - println("unable to open input file"); - return (-1); + if !isfile( inc_mtx_file ) + println("unable to open input file") + return (-1) end # load input data - t_read_inc=@elapsed ii = readdlm( inc_mtx_file, '\t', Int64); - println("incidence matrix read time : ", t_read_inc); - - t_create_inc=@elapsed E = sparse( ii[:,1], ii[:,2], ii[:,3] ); - println("sparse adj. matrix creation time : ", t_create_inc); - - # - tic(); - m,n = size(E); - x, xc = calcx(E, m, n, k); - while sum(xc) != sum( any(E,2) ) - E[find(x), :] = 0 - x, xc = calcx(E, m, n, k); + t_read_inc=@elapsed ii = readdlm( inc_mtx_file, '\t', Int64) + println("incidence matrix read time : ", t_read_inc) + + t_create_inc=@elapsed E = sparse( ii[:,1], ii[:,2], ii[:,3] ) + println("sparse adj. matrix creation time : ", t_create_inc) + + tic() + m,n = size(E) + xcinds, rowcount = calcx(E, m, n, k) + while length(xcinds) != rowcount + # keep rows of E where x is false, equivalent to E[find(x), :] = 0 + Ekeep = E[xcinds, :] + E = SparseMatrixCSC(m, n, Ekeep.colptr, xcinds[Ekeep.rowval], Ekeep.nzval) + xcinds, rowcount = calcx(E, m, n, k) end return E