Skip to content

Commit 22306ef

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 851bd8d commit 22306ef

1 file changed

Lines changed: 39 additions & 35 deletions

File tree

machine_learning/dimensionality_reduction.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -193,36 +193,36 @@ def locally_linear_embedding(
193193

194194
# Remove the first index (point itself)
195195
indices = indices[:, 1:]
196-
196+
197197
# Create weight matrix W
198198
W = np.zeros((n_samples, n_samples))
199-
199+
200200
for i in range(n_samples):
201201
# Get neighbors (excluding the point itself)
202202
neighbors = indices[i]
203203
# Center the neighbors
204204
Z = X[neighbors] - X[i]
205205
# Local covariance matrix - ensure float64
206206
C = np.dot(Z, Z.T).astype(np.float64)
207-
207+
208208
# Regularization
209209
trace = np.trace(C)
210210
if trace > 0:
211211
reg_value = reg * trace
212212
else:
213213
reg_value = reg
214-
214+
215215
# Ensure we're working with floats for the diagonal update
216216
C = C.astype(np.float64)
217217
np.fill_diagonal(C, C.diagonal() + reg_value)
218-
218+
219219
# Solve for weights
220220
try:
221221
w = np.linalg.solve(C, np.ones(n_neighbors))
222222
except np.linalg.LinAlgError:
223223
# If singular, use pseudoinverse
224224
w = np.linalg.pinv(C).dot(np.ones(n_neighbors))
225-
225+
226226
# Normalize weights
227227
w /= np.sum(w)
228228
W[i, neighbors] = w
@@ -233,11 +233,11 @@ def locally_linear_embedding(
233233

234234
# Compute eigenvectors - use all and then select
235235
eigenvalues, eigenvectors = eigh(M)
236-
236+
237237
# Sort eigenvalues and take the ones after the first (skip the zero eigenvalue)
238-
idx = np.argsort(eigenvalues)[1:dimensions+1] # Skip first (zero) eigenvalue
238+
idx = np.argsort(eigenvalues)[1 : dimensions + 1] # Skip first (zero) eigenvalue
239239
embedding = eigenvectors[:, idx].T
240-
240+
241241
logging.info("Locally Linear Embedding computed")
242242
return embedding
243243

@@ -267,40 +267,38 @@ def multidimensional_scaling(
267267
if metric:
268268
# Classical MDS
269269
# Compute distance matrix
270-
D = cdist(X, X, metric='euclidean')
271-
D_squared = D ** 2
270+
D = cdist(X, X, metric="euclidean")
271+
D_squared = D**2
272272

273273
# Double centering
274274
H = np.eye(n_samples) - np.ones((n_samples, n_samples)) / n_samples
275275
B = -0.5 * H.dot(D_squared).dot(H)
276276

277277
# Eigen decomposition - get all eigenvectors and select top ones
278278
eigenvalues, eigenvectors = eigh(B)
279-
279+
280280
# Sort in descending order and take top dimensions
281281
idx = np.argsort(eigenvalues)[::-1][:dimensions]
282282
eigenvalues = eigenvalues[idx]
283283
eigenvectors = eigenvectors[:, idx]
284-
284+
285285
# Embedding
286286
embedding = eigenvectors * np.sqrt(eigenvalues)
287-
287+
288288
else:
289-
290289
# Initialize random configuration
291290
rng = np.random.RandomState(42)
292291
embedding = rng.randn(n_samples, dimensions)
293-
292+
294293
# Simple gradient descent (very basic implementation)
295-
D_original = cdist(X, X, metric='euclidean')
296-
294+
D_original = cdist(X, X, metric="euclidean")
295+
297296
for iteration in range(100):
298-
D_embedded = cdist(embedding, embedding, metric='euclidean')
299-
297+
D_embedded = cdist(embedding, embedding, metric="euclidean")
298+
300299
# Stress (loss function)
301300
stress = np.sum((D_original - D_embedded) ** 2)
302-
303-
301+
304302
# Simple gradient update
305303
grad = np.zeros_like(embedding)
306304
for i in range(n_samples):
@@ -309,8 +307,12 @@ def multidimensional_scaling(
309307
diff = embedding[i] - embedding[j]
310308
dist = np.linalg.norm(diff)
311309
if dist > 1e-10:
312-
grad[i] += 2 * (D_embedded[i, j] - D_original[i, j]) * (diff / dist)
313-
310+
grad[i] += (
311+
2
312+
* (D_embedded[i, j] - D_original[i, j])
313+
* (diff / dist)
314+
)
315+
314316
embedding -= 0.01 * grad / n_samples
315317

316318
logging.info("Multidimensional Scaling computed")
@@ -320,11 +322,11 @@ def multidimensional_scaling(
320322
def test_locally_linear_embedding() -> None:
321323
"""Test function for Locally Linear Embedding"""
322324
# Use float data to avoid dtype issues
323-
features = np.array([[1.0, 2.0, 3.0, 4.0],
324-
[2.0, 3.0, 4.0, 5.0],
325-
[3.0, 4.0, 5.0, 6.0]])
325+
features = np.array(
326+
[[1.0, 2.0, 3.0, 4.0], [2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0]]
327+
)
326328
dimensions = 2
327-
329+
328330
try:
329331
embedding = locally_linear_embedding(features, dimensions, n_neighbors=2)
330332
assert embedding.shape[0] == dimensions
@@ -336,22 +338,24 @@ def test_locally_linear_embedding() -> None:
336338

337339
def test_multidimensional_scaling() -> None:
338340
"""Test function for Multidimensional Scaling"""
339-
features = np.array([[1.0, 2.0, 3.0, 4.0],
340-
[2.0, 3.0, 4.0, 5.0],
341-
[3.0, 4.0, 5.0, 6.0]])
341+
features = np.array(
342+
[[1.0, 2.0, 3.0, 4.0], [2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0]]
343+
)
342344
dimensions = 2
343-
345+
344346
try:
345347
# Test metric MDS
346348
embedding_metric = multidimensional_scaling(features, dimensions, metric=True)
347349
assert embedding_metric.shape[0] == dimensions
348350
assert embedding_metric.shape[1] == features.shape[1]
349-
351+
350352
# Test non-metric MDS
351-
embedding_nonmetric = multidimensional_scaling(features, dimensions, metric=False)
353+
embedding_nonmetric = multidimensional_scaling(
354+
features, dimensions, metric=False
355+
)
352356
assert embedding_nonmetric.shape[0] == dimensions
353357
assert embedding_nonmetric.shape[1] == features.shape[1]
354-
358+
355359
except Exception as e:
356360
logging.error(f"MDS test failed: {e}")
357361
raise

0 commit comments

Comments
 (0)