From 5868e1370f4bade97af6abd66b1598d76b1f1d24 Mon Sep 17 00:00:00 2001 From: suyash469 Date: Tue, 3 Feb 2026 23:57:28 +0530 Subject: [PATCH 1/2] feat: Improve data loading and add unit tests for data_loader --- src/data_loader.py | 22 ++++++++++++---------- test/tests_data_loader.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 test/tests_data_loader.py diff --git a/src/data_loader.py b/src/data_loader.py index 100aa05..0859854 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -7,8 +7,6 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - - import os from typing import Optional, Union, Any import pandas as pd @@ -35,24 +33,28 @@ def load_data_msci(path: str = None, n: int = 24) -> dict[str, pd.DataFrame]: '''Loads MSCI daily returns data from 1999-01-01 to 2023-04-18''' path = os.path.join(os.getcwd(), f'data{os.sep}') if path is None else path - # Load msci country index return series + + # --- FILE 1: MSCI Country Indices --- df = pd.read_csv(os.path.join(path, 'msci_country_indices.csv'), - sep=';', + sep=',', # FIXED: Separator is comma index_col=0, header=0, parse_dates=True) - df.index = pd.to_datetime(df.index, format='%d/%m/%Y') + + # FIXED: Date format uses dashes + df.index = pd.to_datetime(df.index, format='%d-%m-%Y') + series_id = df.columns[0:n] X = df[series_id] - # Load msci world index return series + # --- FILE 2: World Index (NDDLWI) --- y = pd.read_csv(f'{path}NDDLWI.csv', - sep=';', + sep=',', index_col=0, header=0, parse_dates=True) - y.index = pd.to_datetime(y.index, format='%d/%m/%Y') - - return {'return_series': X, 'bm_series': y} + # FIXED: Date format uses dashes here too (Line 55 fixed) + y.index = pd.to_datetime(y.index, format='%d-%m-%Y') + return {'return_series': X, 'bm_series': y} \ No newline at end of file diff --git a/test/tests_data_loader.py b/test/tests_data_loader.py new file mode 100644 index 0000000..a4fff35 --- /dev/null +++ b/test/tests_data_loader.py @@ -0,0 +1,34 @@ +import sys +import os +import unittest +import pandas as pd +import numpy as np + +sys.path.insert(1, 'src') + +from data_loader import load_data_msci + +class TestDataLoader(unittest.TestCase): + + def setUp(self): + # This method is run before each test + self.data_path = os.path.join(os.getcwd(), 'data/') + + def test_load_data_msci(self): + # Test if data can be loaded without errors + try: + data = load_data_msci(self.data_path) + self.assertIsNotNone(data) + self.assertIsInstance(data, dict) + self.assertIn('return_series', data) + self.assertIn('bm_series', data) + self.assertIsInstance(data['return_series'], pd.DataFrame) + self.assertIsInstance(data['bm_series'], pd.DataFrame) + self.assertFalse(data['return_series'].empty) + self.assertFalse(data['bm_series'].empty) + print("\nSuccessfully loaded MSCI data.") + except Exception as e: + self.fail(f"load_data_msci failed with an error: {e}") + +if __name__ == '__main__': + unittest.main() From add47bbfaeeccc9e16a2cd827cf5b97a81278eab Mon Sep 17 00:00:00 2001 From: suyash469 Date: Wed, 18 Feb 2026 01:12:26 +0530 Subject: [PATCH 2/2] feat(api): add optimize endpoint with tests --- src/api.py | 99 +++++++++++++++++++++++++++++++++++++++ test/tests_api.py | 66 ++++++++++++++++++++++++++ test/tests_data_loader.py | 59 +++++++++++++---------- 3 files changed, 200 insertions(+), 24 deletions(-) create mode 100644 src/api.py create mode 100644 test/tests_api.py diff --git a/src/api.py b/src/api.py new file mode 100644 index 0000000..a323b6e --- /dev/null +++ b/src/api.py @@ -0,0 +1,99 @@ +from fastapi import FastAPI, HTTPException, Query +from data_loader import load_data_msci +from constraints import Constraints +from optimization_data import OptimizationData +from optimization import LeastSquares +import pandas as pd + +app = FastAPI( + title="PorQua GSoC API", + description="Interactive Portfolio Optimization API for GeomScale", + version="0.1.0", +) + + +@app.get("/") +def home(): + return { + "status": "online", + "message": "PorQua Financial Engine is ready.", + "endpoints": ["/optimize", "/docs"], + } + + +@app.get("/optimize") +def optimize_portfolio( + n_assets: int = Query(5, ge=1), + method: str = Query("least_squares"), +): + """ + Run a single-step portfolio optimization and return weights. + + For now, this uses a Least Squares index replication model on MSCI data. + """ + try: + if method != "least_squares": + raise HTTPException( + status_code=400, + detail="Currently only method='least_squares' is supported.", + ) + + # 1. Load data (country indices + world benchmark) + data = load_data_msci() + return_series = data["return_series"] + bm_series = data["bm_series"] + + if n_assets > return_series.shape[1]: + raise HTTPException( + status_code=400, + detail=f"Requested n_assets={n_assets} but only " + f"{return_series.shape[1]} assets are available.", + ) + + # Restrict to the first n_assets for this example + selected_assets = return_series.columns[:n_assets] + X = return_series[selected_assets] + + # 2. Build simple LongOnly, fully-invested constraints + constraints = Constraints(selection=selected_assets) + constraints.add_budget(rhs=1.0, sense="=") + constraints.add_box(box_type="LongOnly") + + # 3. Build optimization data and model + opt_data = OptimizationData( + return_series=X, + bm_series=bm_series, + align=True, + ) + + optim = LeastSquares( + solver_name="cvxopt", + sparse=True, + verbose=False, + constraints=constraints, + ) + optim.set_objective(opt_data) + optim.solve() + + weights = optim.results["weights"] + weights_series = pd.Series(weights, index=selected_assets, dtype=float) + sum_weights = float(weights_series.sum()) + + return { + "status": "success", + "method": method, + "universe": { + "n_assets": len(selected_assets), + "assets": list(selected_assets), + }, + "weights": weights, + "metrics": { + "sum_weights": sum_weights, + }, + } + + except HTTPException: + # Re-raise FastAPI HTTP errors unchanged + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/test/tests_api.py b/test/tests_api.py new file mode 100644 index 0000000..efd47b1 --- /dev/null +++ b/test/tests_api.py @@ -0,0 +1,66 @@ +import os +import sys +import unittest +from unittest.mock import patch, MagicMock + +from fastapi.testclient import TestClient + +# Ensure 'src' modules (like data_loader) are importable when src.api is loaded +sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "src")) + +from src.api import app + + +client = TestClient(app) + + +class TestAPI(unittest.TestCase): + def test_home_endpoint(self): + response = client.get("/") + self.assertEqual(response.status_code, 200) + payload = response.json() + self.assertEqual(payload["status"], "online") + self.assertIn("/optimize", payload["endpoints"]) + + @patch("src.api.LeastSquares") + @patch("src.api.load_data_msci") + def test_optimize_least_squares(self, mock_load_data_msci, mock_least_squares): + # Arrange: mock data loader to avoid filesystem dependency + import pandas as pd + + return_series = pd.DataFrame( + {"US": [0.01, 0.02], "UK": [0.03, 0.04], "DE": [0.01, -0.01]} + ) + bm_series = pd.DataFrame({"NDDLWI": [0.02, 0.01]}) + mock_load_data_msci.return_value = { + "return_series": return_series, + "bm_series": bm_series, + } + + # Arrange: mock optimizer to avoid calling qpsolvers + mock_instance = MagicMock() + mock_instance.results = { + "weights": {"US": 0.6, "UK": 0.3, "DE": 0.1}, + "status": True, + } + mock_least_squares.return_value = mock_instance + + # Act + response = client.get("/optimize?n_assets=3&method=least_squares") + + # Assert + self.assertEqual(response.status_code, 200) + payload = response.json() + + self.assertEqual(payload["status"], "success") + self.assertEqual(payload["method"], "least_squares") + self.assertEqual(payload["universe"]["n_assets"], 3) + self.assertCountEqual( + payload["universe"]["assets"], ["US", "UK", "DE"] + ) + self.assertAlmostEqual(payload["metrics"]["sum_weights"], 1.0, places=6) + + +if __name__ == "__main__": + unittest.main() + diff --git a/test/tests_data_loader.py b/test/tests_data_loader.py index a4fff35..ca71a22 100644 --- a/test/tests_data_loader.py +++ b/test/tests_data_loader.py @@ -1,34 +1,45 @@ -import sys -import os import unittest import pandas as pd -import numpy as np +from unittest.mock import patch -sys.path.insert(1, 'src') - -from data_loader import load_data_msci +from src.data_loader import load_data_msci # Corrected import statement class TestDataLoader(unittest.TestCase): - def setUp(self): - # This method is run before each test - self.data_path = os.path.join(os.getcwd(), 'data/') + @patch('src.data_loader.pd.read_csv') # Corrected patch target + def test_load_data_msci(self, mock_read_csv): + """ + Test that load_data_msci works correctly by MOCKING the CSV files. + This ensures the test runs on any machine, even without the data folder. + """ + # 1. Create fake data to simulate the CSV files + # Fake Country Index Data (X) + mock_country_data = pd.DataFrame( + {'US': [0.01, 0.02], 'UK': [0.03, 0.04]}, + index=pd.to_datetime(['2023-01-01', '2023-01-02']) + ) + + # Fake World Index Data (y) + mock_world_data = pd.DataFrame( + {'NDDLWI': [0.05, 0.06]}, + index=pd.to_datetime(['2023-01-01', '2023-01-02']) + ) + + # 2. Tell the mock to return our fake data when read_csv is called + mock_read_csv.side_effect = [mock_country_data, mock_world_data] + + # 3. Run the function (path doesn't matter now because we are mocking!) + result = load_data_msci(path='dummy/path/') - def test_load_data_msci(self): - # Test if data can be loaded without errors - try: - data = load_data_msci(self.data_path) - self.assertIsNotNone(data) - self.assertIsInstance(data, dict) - self.assertIn('return_series', data) - self.assertIn('bm_series', data) - self.assertIsInstance(data['return_series'], pd.DataFrame) - self.assertIsInstance(data['bm_series'], pd.DataFrame) - self.assertFalse(data['return_series'].empty) - self.assertFalse(data['bm_series'].empty) - print("\nSuccessfully loaded MSCI data.") - except Exception as e: - self.fail(f"load_data_msci failed with an error: {e}") + # 4. Verify the results + self.assertIsNotNone(result) + self.assertIn('return_series', result) + self.assertIn('bm_series', result) + + # Check that we got our fake data back + pd.testing.assert_frame_equal(result['return_series'], mock_country_data) + pd.testing.assert_frame_equal(result['bm_series'], mock_world_data) + if __name__ == '__main__': unittest.main()