Skip to content

Commit 845c91b

Browse files
committed
~3x speed up by parallelizing radiation damping + fix run_hydrochrono comparison plots
- Refactor and parallelize radiation damping computation - ~3x speed up - Add compare_template.py for various layout options - Fix multi-signal adapter path
1 parent 98dfcf2 commit 845c91b

2 files changed

Lines changed: 174 additions & 59 deletions

File tree

src/hydro_forces.cpp

Lines changed: 135 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include <vector>
2727
#include <chrono>
2828

29+
#ifdef _OPENMP
30+
#include <omp.h>
31+
#endif
32+
2933
const int kDofPerBody = 6;
3034
const int kDofLinOrRot = 3;
3135

@@ -312,6 +316,66 @@ std::vector<double> TestHydro::ComputeForceHydrostatics() {
312316
return force_hydrostatic_;
313317
}
314318

319+
// Internal helpers (file-local)
320+
namespace {
321+
// Remove history entries older than history_min_time.
322+
inline void PruneHistory(std::vector<double>& time_history,
323+
std::vector<std::vector<std::vector<double>>>& velocity_history,
324+
int num_bodies,
325+
double history_min_time) {
326+
while (time_history.size() > 1 && time_history[time_history.size() - 2] < history_min_time) {
327+
time_history.pop_back();
328+
for (int b = 0; b < num_bodies; ++b) {
329+
auto& velocity_history_body = velocity_history[b];
330+
if (!velocity_history_body.empty()) {
331+
velocity_history_body.pop_back();
332+
}
333+
}
334+
}
335+
}
336+
337+
// Interpolate 6-DOF velocities at a query time from two bracketing samples.
338+
inline void InterpolateVelocity6D(const std::vector<std::vector<double>>& velocity_history_body,
339+
size_t newer_index,
340+
double query_time,
341+
double older_time,
342+
double newer_time,
343+
double out_velocity[kDofPerBody]) {
344+
if (query_time == older_time) {
345+
const auto& older_velocity = velocity_history_body[newer_index + 1];
346+
for (int dof = 0; dof < kDofPerBody; ++dof) out_velocity[dof] = older_velocity[dof];
347+
return;
348+
}
349+
if (query_time == newer_time) {
350+
const auto& newer_velocity = velocity_history_body[newer_index];
351+
for (int dof = 0; dof < kDofPerBody; ++dof) out_velocity[dof] = newer_velocity[dof];
352+
return;
353+
}
354+
if (query_time > older_time && query_time < newer_time) {
355+
const double time_delta = (newer_time - older_time);
356+
const double weight_older = (time_delta != 0.0) ? ((newer_time - query_time) / time_delta) : 0.0;
357+
const double weight_newer = 1.0 - weight_older;
358+
const auto& older_velocity = velocity_history_body[newer_index + 1];
359+
const auto& newer_velocity = velocity_history_body[newer_index];
360+
for (int dof = 0; dof < kDofPerBody; ++dof) {
361+
out_velocity[dof] = weight_older * older_velocity[dof] + weight_newer * newer_velocity[dof];
362+
}
363+
return;
364+
}
365+
throw std::runtime_error("Radiation convolution: interpolation error; query_time not bracketed by history.");
366+
}
367+
368+
// Advance index so that time_history[index] >= query_time >= time_history[index+1] (newest first ordering).
369+
inline bool AdvanceToBracket(const std::vector<double>& time_history,
370+
size_t& index,
371+
double query_time) {
372+
while ((index + 1) < time_history.size() && time_history[index + 1] > query_time) {
373+
++index;
374+
}
375+
return ((index + 1) < time_history.size());
376+
}
377+
} // namespace
378+
315379
std::vector<double> TestHydro::ComputeForceRadiationDampingConv() {
316380
auto __t0 = std::chrono::steady_clock::now();
317381
const int rirf_steps = file_info_.GetRIRFDims(2);
@@ -347,17 +411,7 @@ std::vector<double> TestHydro::ComputeForceRadiationDampingConv() {
347411
}
348412

349413
// Prune history older than the max RIRF time span
350-
if (time_history_.size() > 1) {
351-
while (time_history_.size() > 1 && time_history_[time_history_.size() - 2] < history_min_time) {
352-
time_history_.pop_back();
353-
for (int b = 0; b < num_bodies_; ++b) {
354-
auto& velocity_history_body = velocity_history_[b];
355-
if (!velocity_history_body.empty()) {
356-
velocity_history_body.pop_back();
357-
}
358-
}
359-
}
360-
}
414+
PruneHistory(time_history_, velocity_history_, num_bodies_, history_min_time);
361415

362416
// Nothing to convolve with if we don't yet have at least 2 time points
363417
if (time_history_.size() <= 1) {
@@ -368,65 +422,90 @@ std::vector<double> TestHydro::ComputeForceRadiationDampingConv() {
368422

369423
// Walk through RIRF steps and accumulate convolution
370424
size_t history_index = 0; // index into descending time_history_ (front is most recent)
425+
426+
#ifdef _OPENMP
427+
const int num_threads = omp_get_max_threads();
428+
std::vector<std::vector<double>> thread_locals(num_threads, std::vector<double>(total_dofs, 0.0));
429+
430+
#pragma omp parallel
431+
{
432+
const int tid = omp_get_thread_num();
433+
auto& local_out = thread_locals[tid];
434+
435+
size_t history_index_local = history_index;
436+
#pragma omp for schedule(static)
437+
for (int step = 0; step < rirf_steps; ++step) {
438+
const double rirf_query_time = simulation_time - rirf_time_vector[step];
439+
440+
size_t time_index = history_index_local;
441+
if (!AdvanceToBracket(time_history_, time_index, rirf_query_time)) {
442+
continue; // not enough older history to interpolate
443+
}
444+
history_index_local = time_index;
445+
446+
const double newer_time = time_history_[history_index_local];
447+
const double older_time = time_history_[history_index_local + 1];
448+
449+
for (int body_index = 0; body_index < num_bodies_; ++body_index) {
450+
const auto& velocity_history_body = velocity_history_[body_index];
451+
if (velocity_history_body.size() <= history_index_local) {
452+
continue; // inconsistent; should not happen in normal flow
453+
}
454+
455+
double interpolated_velocity_dof[kDofPerBody];
456+
InterpolateVelocity6D(velocity_history_body, history_index_local, rirf_query_time,
457+
older_time, newer_time, interpolated_velocity_dof);
458+
459+
const double step_width = rirf_width_vector[step];
460+
if (step_width == 0.0) {
461+
continue;
462+
}
463+
const int body_col_offset = body_index * kDofPerBody;
464+
for (int dof = 0; dof < kDofPerBody; ++dof) {
465+
const int col = body_col_offset + dof;
466+
const double contribution_scale = interpolated_velocity_dof[dof] * step_width;
467+
if (contribution_scale == 0.0) {
468+
continue;
469+
}
470+
for (int row = 0; row < total_dofs; ++row) {
471+
local_out[row] += GetRIRFval(row, col, step) * contribution_scale;
472+
}
473+
}
474+
}
475+
}
476+
}
477+
478+
// Deterministic combine of thread-local accumulators
479+
for (int t = 0; t < num_threads; ++t) {
480+
const auto& local = thread_locals[t];
481+
for (int row = 0; row < total_dofs; ++row) {
482+
force_radiation_damping_[row] += local[row];
483+
}
484+
}
485+
#else
486+
double* force_radiation_out = force_radiation_damping_.data();
371487
for (int step = 0; step < rirf_steps; ++step) {
372488
const double rirf_query_time = simulation_time - rirf_time_vector[step];
373489

374-
// Advance history_index until [history_index, history_index+1] brackets rirf_query_time, or we run out
375-
while ((history_index + 1) < time_history_.size() && time_history_[history_index + 1] > rirf_query_time) {
376-
++history_index;
377-
}
378-
if ((history_index + 1) >= time_history_.size()) {
490+
size_t time_index = history_index;
491+
if (!AdvanceToBracket(time_history_, time_index, rirf_query_time)) {
379492
break; // not enough older history to interpolate
380493
}
494+
history_index = time_index;
381495

382496
const double newer_time = time_history_[history_index];
383497
const double older_time = time_history_[history_index + 1];
384498

385-
// For each body, interpolate 6-DOF velocity at rirf_query_time and apply convolution
386499
for (int body_index = 0; body_index < num_bodies_; ++body_index) {
387-
auto& velocity_history_body = velocity_history_[body_index];
388-
389-
// Ensure history is consistent across bodies
500+
const auto& velocity_history_body = velocity_history_[body_index];
390501
if (velocity_history_body.size() <= history_index) {
391502
continue; // skip if inconsistent; should not happen in normal flow
392503
}
393504

394-
// Interpolate velocities
395505
double interpolated_velocity_dof[kDofPerBody];
396-
if (rirf_query_time == older_time) {
397-
const auto& older_velocity = velocity_history_body[history_index + 1];
398-
interpolated_velocity_dof[0] = older_velocity[0];
399-
interpolated_velocity_dof[1] = older_velocity[1];
400-
interpolated_velocity_dof[2] = older_velocity[2];
401-
interpolated_velocity_dof[3] = older_velocity[3];
402-
interpolated_velocity_dof[4] = older_velocity[4];
403-
interpolated_velocity_dof[5] = older_velocity[5];
404-
} else if (rirf_query_time == newer_time) {
405-
const auto& newer_velocity = velocity_history_body[history_index];
406-
interpolated_velocity_dof[0] = newer_velocity[0];
407-
interpolated_velocity_dof[1] = newer_velocity[1];
408-
interpolated_velocity_dof[2] = newer_velocity[2];
409-
interpolated_velocity_dof[3] = newer_velocity[3];
410-
interpolated_velocity_dof[4] = newer_velocity[4];
411-
interpolated_velocity_dof[5] = newer_velocity[5];
412-
} else if (rirf_query_time > older_time && rirf_query_time < newer_time) {
413-
const double time_delta = (newer_time - older_time);
414-
const double weight_older = (time_delta != 0.0) ? ((newer_time - rirf_query_time) / time_delta) : 0.0;
415-
const double weight_newer = 1.0 - weight_older;
416-
const auto& older_velocity = velocity_history_body[history_index + 1];
417-
const auto& newer_velocity = velocity_history_body[history_index];
418-
interpolated_velocity_dof[0] = weight_older * older_velocity[0] + weight_newer * newer_velocity[0];
419-
interpolated_velocity_dof[1] = weight_older * older_velocity[1] + weight_newer * newer_velocity[1];
420-
interpolated_velocity_dof[2] = weight_older * older_velocity[2] + weight_newer * newer_velocity[2];
421-
interpolated_velocity_dof[3] = weight_older * older_velocity[3] + weight_newer * newer_velocity[3];
422-
interpolated_velocity_dof[4] = weight_older * older_velocity[4] + weight_newer * newer_velocity[4];
423-
interpolated_velocity_dof[5] = weight_older * older_velocity[5] + weight_newer * newer_velocity[5];
424-
} else {
425-
throw std::runtime_error(
426-
"Radiation convolution: interpolation error; rirf_query_time not bracketed by adjacent history.");
427-
}
506+
InterpolateVelocity6D(velocity_history_body, history_index, rirf_query_time,
507+
older_time, newer_time, interpolated_velocity_dof);
428508

429-
// Apply convolution: for each DOF column, add RIRF[:, col, step] * vel[dof] * width
430509
const double step_width = rirf_width_vector[step];
431510
const int body_col_offset = body_index * kDofPerBody;
432511
for (int dof = 0; dof < kDofPerBody; ++dof) {
@@ -436,11 +515,12 @@ std::vector<double> TestHydro::ComputeForceRadiationDampingConv() {
436515
continue;
437516
}
438517
for (int row = 0; row < total_dofs; ++row) {
439-
force_radiation_damping_[row] += GetRIRFval(row, col, step) * contribution_scale;
518+
force_radiation_out[row] += GetRIRFval(row, col, step) * contribution_scale;
440519
}
441520
}
442521
}
443522
}
523+
#endif
444524

445525
profile_stats_.radiation_seconds += std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - __t0).count();
446526
profile_stats_.radiation_calls++;

tests/regression/run_hydrochrono/run_tests.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,41 @@
1515
THIS = _here.parent
1616

1717

18+
def _resolve_compare_template_dir() -> Path | None:
19+
"""Locate a directory containing compare_template.py across source/install layouts.
20+
21+
Search order (first match wins):
22+
- tests/regression (source tree layout)
23+
- tests/regression/utilities (alternative location)
24+
- siblings relative to installed layout: tests/regression, tests/regression/utilities
25+
- walk up a few parents and probe known subpaths
26+
"""
27+
candidates: list[Path] = []
28+
# Local candidates near this script
29+
candidates.append(THIS.parent / "regression") # tests/regression (source layout)
30+
candidates.append(THIS.parent / "regression" / "utilities")
31+
candidates.append(THIS) # tests/run_hydrochrono sibling 'compare_template.py' (rare)
32+
candidates.append(THIS.parent) # tests/
33+
# Root-based candidates
34+
candidates.append(ROOT / "tests" / "regression")
35+
candidates.append(ROOT / "tests" / "regression" / "utilities")
36+
37+
# Walk up a few levels and try common subpaths
38+
cur = _here
39+
for _ in range(6):
40+
cur = cur.parent
41+
candidates.append(cur / "tests" / "regression")
42+
candidates.append(cur / "tests" / "regression" / "utilities")
43+
44+
for c in candidates:
45+
try:
46+
if (c / "compare_template.py").exists():
47+
return c
48+
except Exception:
49+
pass
50+
return None
51+
52+
1853
def default_exe() -> str:
1954
"""Best-effort discovery of run_hydrochrono.exe in build and install layouts."""
2055
# Allow override via environment
@@ -92,9 +127,9 @@ def run_case(exe: str, model: str, test_type: str, tol: float, update_baseline:
92127
assert spec and spec.loader
93128
adapter = importlib.util.module_from_spec(spec)
94129
spec.loader.exec_module(adapter) # type: ignore
95-
# import plotting template from tests/regression
96-
regression_dir = THIS.parent
97-
if str(regression_dir) not in sys.path:
130+
# import plotting template from tests/regression (robust resolution)
131+
regression_dir = _resolve_compare_template_dir()
132+
if regression_dir is not None and str(regression_dir) not in sys.path:
98133
sys.path.insert(0, str(regression_dir))
99134
from compare_template import create_comparison_plot # type: ignore
100135
# helper
@@ -135,7 +170,7 @@ def rms_relative_error(ref_arr: np.ndarray, pred_arr: np.ndarray) -> float:
135170
)
136171
if result != "PASS":
137172
status = 1
138-
return status
173+
return status
139174
if single is not None:
140175
t_sim, y_sim, y_label = single(outputs_h5)
141176
if ref:

0 commit comments

Comments
 (0)