Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from fastapi import FastAPI, HTTPException, Query
from data_loader import load_data_msci
from constraints import Constraints
from optimization_data import OptimizationData
from optimization import LeastSquares
import pandas as pd

app = FastAPI(
title="PorQua GSoC API",
description="Interactive Portfolio Optimization API for GeomScale",
version="0.1.0",
)


@app.get("/")
def home():
return {
"status": "online",
"message": "PorQua Financial Engine is ready.",
"endpoints": ["/optimize", "/docs"],
}


@app.get("/optimize")
def optimize_portfolio(
n_assets: int = Query(5, ge=1),
method: str = Query("least_squares"),
):
"""
Run a single-step portfolio optimization and return weights.

For now, this uses a Least Squares index replication model on MSCI data.
"""
try:
if method != "least_squares":
raise HTTPException(
status_code=400,
detail="Currently only method='least_squares' is supported.",
)

# 1. Load data (country indices + world benchmark)
data = load_data_msci()
return_series = data["return_series"]
bm_series = data["bm_series"]

if n_assets > return_series.shape[1]:
raise HTTPException(
status_code=400,
detail=f"Requested n_assets={n_assets} but only "
f"{return_series.shape[1]} assets are available.",
)

# Restrict to the first n_assets for this example
selected_assets = return_series.columns[:n_assets]
X = return_series[selected_assets]

# 2. Build simple LongOnly, fully-invested constraints
constraints = Constraints(selection=selected_assets)
constraints.add_budget(rhs=1.0, sense="=")
constraints.add_box(box_type="LongOnly")

# 3. Build optimization data and model
opt_data = OptimizationData(
return_series=X,
bm_series=bm_series,
align=True,
)

optim = LeastSquares(
solver_name="cvxopt",
sparse=True,
verbose=False,
constraints=constraints,
)
optim.set_objective(opt_data)
optim.solve()

weights = optim.results["weights"]
weights_series = pd.Series(weights, index=selected_assets, dtype=float)
sum_weights = float(weights_series.sum())

return {
"status": "success",
"method": method,
"universe": {
"n_assets": len(selected_assets),
"assets": list(selected_assets),
},
"weights": weights,
"metrics": {
"sum_weights": sum_weights,
},
}

except HTTPException:
# Re-raise FastAPI HTTP errors unchanged
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
22 changes: 12 additions & 10 deletions src/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
Licensed under GNU LGPL.3, see LICENCE file
'''



import os
from typing import Optional, Union, Any
import pandas as pd
Expand All @@ -35,24 +33,28 @@ def load_data_msci(path: str = None, n: int = 24) -> dict[str, pd.DataFrame]:
'''Loads MSCI daily returns data from 1999-01-01 to 2023-04-18'''

path = os.path.join(os.getcwd(), f'data{os.sep}') if path is None else path
# Load msci country index return series

# --- FILE 1: MSCI Country Indices ---
df = pd.read_csv(os.path.join(path, 'msci_country_indices.csv'),
sep=';',
sep=',', # FIXED: Separator is comma
index_col=0,
header=0,
parse_dates=True)
df.index = pd.to_datetime(df.index, format='%d/%m/%Y')

# FIXED: Date format uses dashes
df.index = pd.to_datetime(df.index, format='%d-%m-%Y')

series_id = df.columns[0:n]
X = df[series_id]

# Load msci world index return series
# --- FILE 2: World Index (NDDLWI) ---
y = pd.read_csv(f'{path}NDDLWI.csv',
sep=';',
sep=',',
index_col=0,
header=0,
parse_dates=True)

y.index = pd.to_datetime(y.index, format='%d/%m/%Y')

return {'return_series': X, 'bm_series': y}
# FIXED: Date format uses dashes here too (Line 55 fixed)
y.index = pd.to_datetime(y.index, format='%d-%m-%Y')

return {'return_series': X, 'bm_series': y}
66 changes: 66 additions & 0 deletions test/tests_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import sys
import unittest
from unittest.mock import patch, MagicMock

from fastapi.testclient import TestClient

# Ensure 'src' modules (like data_loader) are importable when src.api is loaded
sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "src"))

from src.api import app


client = TestClient(app)


class TestAPI(unittest.TestCase):
def test_home_endpoint(self):
response = client.get("/")
self.assertEqual(response.status_code, 200)
payload = response.json()
self.assertEqual(payload["status"], "online")
self.assertIn("/optimize", payload["endpoints"])

@patch("src.api.LeastSquares")
@patch("src.api.load_data_msci")
def test_optimize_least_squares(self, mock_load_data_msci, mock_least_squares):
# Arrange: mock data loader to avoid filesystem dependency
import pandas as pd

return_series = pd.DataFrame(
{"US": [0.01, 0.02], "UK": [0.03, 0.04], "DE": [0.01, -0.01]}
)
bm_series = pd.DataFrame({"NDDLWI": [0.02, 0.01]})
mock_load_data_msci.return_value = {
"return_series": return_series,
"bm_series": bm_series,
}

# Arrange: mock optimizer to avoid calling qpsolvers
mock_instance = MagicMock()
mock_instance.results = {
"weights": {"US": 0.6, "UK": 0.3, "DE": 0.1},
"status": True,
}
mock_least_squares.return_value = mock_instance

# Act
response = client.get("/optimize?n_assets=3&method=least_squares")

# Assert
self.assertEqual(response.status_code, 200)
payload = response.json()

self.assertEqual(payload["status"], "success")
self.assertEqual(payload["method"], "least_squares")
self.assertEqual(payload["universe"]["n_assets"], 3)
self.assertCountEqual(
payload["universe"]["assets"], ["US", "UK", "DE"]
)
self.assertAlmostEqual(payload["metrics"]["sum_weights"], 1.0, places=6)


if __name__ == "__main__":
unittest.main()

45 changes: 45 additions & 0 deletions test/tests_data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
import pandas as pd
from unittest.mock import patch

from src.data_loader import load_data_msci # Corrected import statement

class TestDataLoader(unittest.TestCase):

@patch('src.data_loader.pd.read_csv') # Corrected patch target
def test_load_data_msci(self, mock_read_csv):
"""
Test that load_data_msci works correctly by MOCKING the CSV files.
This ensures the test runs on any machine, even without the data folder.
"""
# 1. Create fake data to simulate the CSV files
# Fake Country Index Data (X)
mock_country_data = pd.DataFrame(
{'US': [0.01, 0.02], 'UK': [0.03, 0.04]},
index=pd.to_datetime(['2023-01-01', '2023-01-02'])
)

# Fake World Index Data (y)
mock_world_data = pd.DataFrame(
{'NDDLWI': [0.05, 0.06]},
index=pd.to_datetime(['2023-01-01', '2023-01-02'])
)

# 2. Tell the mock to return our fake data when read_csv is called
mock_read_csv.side_effect = [mock_country_data, mock_world_data]

# 3. Run the function (path doesn't matter now because we are mocking!)
result = load_data_msci(path='dummy/path/')

# 4. Verify the results
self.assertIsNotNone(result)
self.assertIn('return_series', result)
self.assertIn('bm_series', result)

# Check that we got our fake data back
pd.testing.assert_frame_equal(result['return_series'], mock_country_data)
pd.testing.assert_frame_equal(result['bm_series'], mock_world_data)


if __name__ == '__main__':
unittest.main()