BaseModelAI
diff --git a/‎dom_datasets.py‎
Lines changed: 43 additions & 0 deletions b/‎dom_datasets.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎dom_fast.py‎
Lines changed: 55 additions & 0 deletions b/‎dom_fast.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎dom_fb_competitors.py‎
Lines changed: 37 additions & 0 deletions b/‎dom_fb_competitors.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎dom_scale.py‎
Lines changed: 31 additions & 0 deletions b/‎dom_scale.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎dom_verify.py‎
Lines changed: 48 additions & 0 deletions b/‎dom_verify.py‎
Lines changed: 48 additions & 0 deletions
@@ -0,0 +1,43 @@
+import numpy as np, time, tracemalloc, sys, gc
+from pycleora import SparseMatrix, embed
+from pycleora.algorithms import embed_prone, embed_randne
+from pycleora.classify import mlp_classify
+from pycleora.metrics import node_classification_scores
+from pycleora.community import detect_communities_louvain
+from pycleora.datasets import load_dataset
+DIM = 256
+
+def measure(fn, g):
+    gc.collect(); tracemalloc.start()
+    t0 = time.time(); r = fn(g); t = time.time() - t0
+    _, peak = tracemalloc.get_traced_memory(); tracemalloc.stop()
+    return r, t, peak/1024/1024
+
+def run(name, fn, g, lb, mlp_h=64, mlp_e=100):
+    emb, t, mem = measure(fn, g)
+    nc = node_classification_scores(g, emb, lb, seed=42)
+    try:
+        mlp = mlp_classify(g, emb, lb, hidden_dim=mlp_h, num_epochs=mlp_e, learning_rate=0.01, seed=42)
+        mlp_acc = mlp['accuracy']
+        mlp_f1 = mlp['macro_f1']
+    except:
+        mlp_acc = -1; mlp_f1 = -1
+    print(f"{name:<18s} {nc['accuracy']:>8.4f} {nc['macro_f1']:>8.4f} {mlp_acc:>8.4f} {mlp_f1:>8.4f} {t:>7.3f}s {mem:>7.1f}")
+    sys.stdout.flush(); gc.collect()
+
+for ds_name in ["ppi_large", "flickr", "ogbn_arxiv"]:
+    print(f"\n=== {ds_name.upper()} ===")
+    ds = load_dataset(ds_name)
+    g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+    lb = detect_communities_louvain(g)
+    print(f"Nodes: {ds['num_nodes']}, Labels: {len(lb)}")
+    algos = [
+        ("Cleora(w,8it)", lambda g: embed(g, DIM, 8, whiten=True)),
+        ("Cleora(base)", lambda g: embed(g, DIM, 4)),
+        ("ProNE", lambda g: embed_prone(g, DIM)),
+        ("RandNE", lambda g: embed_randne(g, DIM)),
+    ]
+    for name, fn in algos:
+        run(name, fn, g, lb)
+
+print("\nDONE!")
@@ -0,0 +1,55 @@
+import numpy as np, time, tracemalloc, sys, gc
+from pycleora import SparseMatrix, embed
+from pycleora.algorithms import embed_prone, embed_randne
+from pycleora.classify import mlp_classify
+from pycleora.metrics import node_classification_scores
+from pycleora.community import detect_communities_louvain
+from pycleora.datasets import load_dataset
+DIM = 256
+
+def measure(fn, g):
+    gc.collect(); tracemalloc.start()
+    t0 = time.time(); r = fn(g); t = time.time() - t0
+    _, peak = tracemalloc.get_traced_memory(); tracemalloc.stop()
+    return r, t, peak/1024/1024
+
+def run(name, fn, g, lb):
+    emb, t, mem = measure(fn, g)
+    nc = node_classification_scores(g, emb, lb, seed=42)
+    mlp = mlp_classify(g, emb, lb, hidden_dim=128, num_epochs=200, learning_rate=0.01, seed=42)
+    print(f"{name:<18s} {nc['accuracy']:>8.4f} {nc['macro_f1']:>8.4f} {mlp['accuracy']:>8.4f} {mlp['macro_f1']:>8.4f} {t:>7.3f}s {mem:>7.1f}")
+    sys.stdout.flush(); gc.collect()
+
+# Facebook - remaining algos
+print("=== FACEBOOK (ProNE, RandNE) ===")
+ds = load_dataset("facebook")
+g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+lb = detect_communities_louvain(g)
+run("ProNE", lambda g: embed_prone(g, DIM), g, lb)
+run("RandNE", lambda g: embed_randne(g, DIM), g, lb)
+
+# PPI-large 
+print("\n=== PPI-LARGE ===")
+ds = load_dataset("ppi_large")
+g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+lb = detect_communities_louvain(g)
+print(f"Nodes: {ds['num_nodes']}, Labels: {len(lb)}")
+print(f"{'Algo':<18s} {'NC_Acc':>8s} {'NC_F1':>8s} {'MLP_Acc':>8s} {'MLP_F1':>8s} {'Time':>8s} {'Mem':>8s}")
+run("Cleora(w,8it)", lambda g: embed(g, DIM, 8, whiten=True), g, lb)
+run("Cleora(base)", lambda g: embed(g, DIM, 4), g, lb)
+run("ProNE", lambda g: embed_prone(g, DIM), g, lb)
+run("RandNE", lambda g: embed_randne(g, DIM), g, lb)
+
+# Flickr
+print("\n=== FLICKR ===")
+ds = load_dataset("flickr")
+g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+lb = detect_communities_louvain(g)
+print(f"Nodes: {ds['num_nodes']}, Labels: {len(lb)}")
+print(f"{'Algo':<18s} {'NC_Acc':>8s} {'NC_F1':>8s} {'MLP_Acc':>8s} {'MLP_F1':>8s} {'Time':>8s} {'Mem':>8s}")
+run("Cleora(w,8it)", lambda g: embed(g, DIM, 8, whiten=True), g, lb)
+run("Cleora(base)", lambda g: embed(g, DIM, 4), g, lb)
+run("ProNE", lambda g: embed_prone(g, DIM), g, lb)
+run("RandNE", lambda g: embed_randne(g, DIM), g, lb)
+
+print("\nDONE!")
@@ -0,0 +1,37 @@
+import numpy as np, time, tracemalloc, sys, gc
+from pycleora import SparseMatrix, embed
+from pycleora.algorithms import embed_prone, embed_randne, embed_netmf, embed_deepwalk
+from pycleora.classify import mlp_classify
+from pycleora.metrics import node_classification_scores
+from pycleora.community import detect_communities_louvain
+from pycleora.datasets import load_dataset
+
+DIM = 256
+
+def measure(fn, g):
+    gc.collect(); tracemalloc.start()
+    t0 = time.time(); r = fn(g); t = time.time() - t0
+    _, peak = tracemalloc.get_traced_memory(); tracemalloc.stop()
+    return r, t, peak/1024/1024
+
+ds = load_dataset("facebook")
+g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+lb = detect_communities_louvain(g)
+
+algos = [
+    ("NetMF", lambda g: embed_netmf(g, DIM)),
+    ("DeepWalk", lambda g: embed_deepwalk(g, DIM, num_walks=10, walk_length=20)),
+    ("ProNE", lambda g: embed_prone(g, DIM)),
+    ("RandNE", lambda g: embed_randne(g, DIM)),
+]
+
+print(f"{'Algo':<14s} {'NC_Acc':>8s} {'NC_F1':>8s} {'MLP_Acc':>8s} {'MLP_F1':>8s} {'Time':>8s} {'Mem':>8s}")
+for name, fn in algos:
+    try:
+        emb, t, mem = measure(fn, g)
+        nc = node_classification_scores(g, emb, lb, seed=42)
+        mlp = mlp_classify(g, emb, lb, hidden_dim=128, num_epochs=200, learning_rate=0.01, seed=42)
+        print(f"{name:<14s} {nc['accuracy']:>8.4f} {nc['macro_f1']:>8.4f} {mlp['accuracy']:>8.4f} {mlp['macro_f1']:>8.4f} {t:>7.3f}s {mem:>7.1f}")
+    except Exception as e:
+        print(f"{name:<14s} ERROR: {str(e)[:60]}")
+    sys.stdout.flush(); gc.collect()
@@ -0,0 +1,31 @@
+import numpy as np, time, tracemalloc, gc
+from pycleora import SparseMatrix, embed
+from pycleora.datasets import load_dataset
+DIM = 256
+
+def m(fn, g):
+    gc.collect(); tracemalloc.start()
+    t0 = time.time(); r = fn(g); t = time.time() - t0
+    _, p = tracemalloc.get_traced_memory(); tracemalloc.stop()
+    return r, t, p/1024/1024
+
+# Test 16 iterations on yelp and roadnet
+for ds_name in ["yelp", "roadnet"]:
+    ds = load_dataset(ds_name)
+    g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+    print(f"{ds_name}: nodes={ds['num_nodes']}, edges={ds['num_edges']}")
+    
+    # 4 iterations (baseline)
+    emb, t, mem = m(lambda g: embed(g, DIM, 4), g)
+    print(f"  4 iter: t={t:.3f}s, mem={mem:.1f}MB")
+    del emb; gc.collect()
+    
+    # 8 iterations
+    emb, t, mem = m(lambda g: embed(g, DIM, 8), g)
+    print(f"  8 iter: t={t:.3f}s, mem={mem:.1f}MB")
+    del emb; gc.collect()
+    
+    # 16 iterations 
+    emb, t, mem = m(lambda g: embed(g, DIM, 16), g)
+    print(f"  16 iter: t={t:.3f}s, mem={mem:.1f}MB")
+    del emb; gc.collect()
@@ -0,0 +1,48 @@
+import numpy as np, time, tracemalloc, gc
+from pycleora import SparseMatrix, embed
+from pycleora.algorithms import embed_netmf, embed_deepwalk
+from pycleora.classify import mlp_classify
+from pycleora.metrics import node_classification_scores, cross_validate
+from pycleora.community import detect_communities_louvain
+from pycleora.datasets import load_dataset
+DIM = 256
+
+def m(fn, g):
+    gc.collect(); tracemalloc.start()
+    t0 = time.time(); r = fn(g); t = time.time() - t0
+    _, p = tracemalloc.get_traced_memory(); tracemalloc.stop()
+    return r, t, p/1024/1024
+
+ds = load_dataset("facebook")
+g = SparseMatrix.from_iterator(iter(ds["edges"]), ds["columns"])
+lb = detect_communities_louvain(g)
+
+# Verify Cleora(w,16it) 3 runs for consistency  
+print("=== CLEORA(w,16it) - 3 RUNS ===")
+for run in range(3):
+    emb, t, mem = m(lambda g: embed(g, DIM, 16, whiten=True), g)
+    nc = node_classification_scores(g, emb, lb, seed=42)
+    print(f"Run {run+1}: NC={nc['accuracy']:.4f} F1={nc['macro_f1']:.4f} t={t:.3f}s mem={mem:.1f}MB")
+    gc.collect()
+
+# Verify MLP stability
+print("\n=== CLEORA(w,16it) MLP - 3 seeds ===")
+emb16, t, mem = m(lambda g: embed(g, DIM, 16, whiten=True), g)
+for seed in [42, 123, 777]:
+    mlp = mlp_classify(g, emb16, lb, hidden_dim=256, num_epochs=400, learning_rate=0.005, seed=seed)
+    print(f"Seed {seed}: MLP_acc={mlp['accuracy']:.4f} MLP_f1={mlp['macro_f1']:.4f}")
+
+# Cross-validation on Cleora(w,16it)
+print("\n=== CLEORA(w,16it) CROSS-VALIDATION ===")
+cv = cross_validate(g, emb16, lb, k_folds=5, seed=42)
+print(f"CV Acc: {cv['mean_accuracy']:.4f} ± {cv['std_accuracy']:.4f}")
+print(f"CV F1:  {cv['mean_macro_f1']:.4f} ± {cv['std_macro_f1']:.4f}")
+
+# Also check: does Cleora(w,8it) win on NC with slightly different seed?
+print("\n=== STABILITY: Cleora(w,8it) vs NetMF NC ===")
+for seed in [42, 0, 123]:
+    emb8, _, _ = m(lambda g: embed(g, DIM, 8, whiten=True), g)
+    nc8 = node_classification_scores(g, emb8, lb, seed=seed)
+    print(f"Cleora(w,8it) seed={seed}: NC={nc8['accuracy']:.4f}")
+
+print("\nDONE!")