|
17 | 17 | from pathlib import Path |
18 | 18 | import pandas as pd |
19 | 19 | import panel as pn |
| 20 | +import numpy as np |
20 | 21 | from bokeh.palettes import Category20 |
21 | 22 | from bokeh.models import ColumnDataSource, Div |
22 | 23 | from jinja2 import Environment, FileSystemLoader |
@@ -312,16 +313,50 @@ def select_data(question, data_filters, data_filters_method, filter_by=FILTER_BY |
312 | 313 | all_data = data_all.get("All", []) |
313 | 314 | print('Adding All data, length:', len(all_data), 'x_value length:', len(data["x_value"])) |
314 | 315 |
|
315 | | - # Simple approach: truncate "All" data to match x_value length |
316 | | - if len(all_data) > len(data["x_value"]): |
317 | | - data["All"] = all_data[:len(data["x_value"])] |
318 | | - print('All data truncated to match x_value length') |
319 | | - else: |
320 | | - # If "All" data is shorter, just use what we have |
| 316 | + # OPTION 2: When "All" + specific filters are selected, expand to show all research areas |
| 317 | + if "researchArea" in data: |
| 318 | + # Use full "All" data and expand x_value to all research areas |
321 | 319 | data["All"] = all_data |
322 | | - print('All data added (shorter than x_value)') |
| 320 | + data["x_value"] = data_all["researchArea"] |
| 321 | + |
| 322 | + # Expand specific research area data to match full x_value length |
| 323 | + for key in data_filters: |
| 324 | + if key != "All" and key in data: |
| 325 | + # Find position of this research area in full x_value |
| 326 | + try: |
| 327 | + position = data["x_value"].index(key) |
| 328 | + # Create array with value at correct position, zeros elsewhere |
| 329 | + new_array = np.zeros(len(data["x_value"]), dtype=data[key].dtype) |
| 330 | + new_array[position] = data[key][0] # Use first (and only) value |
| 331 | + data[key] = new_array |
| 332 | + except ValueError: |
| 333 | + # If not found, just pad with zeros |
| 334 | + padding = [0] * (len(data["x_value"]) - len(data[key])) |
| 335 | + data[key] = list(data[key]) + padding |
| 336 | + |
| 337 | + print('Research area: Expanded to show all research areas with aligned data') |
| 338 | + else: |
| 339 | + # For other questions: truncate to match x_value length |
| 340 | + if len(all_data) > len(data["x_value"]): |
| 341 | + data["All"] = all_data[:len(data["x_value"])] |
| 342 | + |
| 343 | + else: |
| 344 | + data["All"] = all_data |
| 345 | + |
| 346 | + |
| 347 | + print('data after adding All:', data) |
323 | 348 |
|
324 | | - print('data after adding All:', data) |
| 349 | + # Also expand Cum. Sum to match the new x_value length |
| 350 | + if "Cum. Sum" in data: |
| 351 | + cum_sum_data = data["Cum. Sum"] |
| 352 | + if len(cum_sum_data) < len(data["x_value"]): |
| 353 | + # Pad with zeros to match new length |
| 354 | + if isinstance(cum_sum_data, np.ndarray): |
| 355 | + padding = np.zeros(len(data["x_value"]) - len(cum_sum_data), dtype=cum_sum_data.dtype) |
| 356 | + data["Cum. Sum"] = np.concatenate([cum_sum_data, padding]) |
| 357 | + else: |
| 358 | + padding = [0] * (len(data["x_value"]) - len(cum_sum_data)) |
| 359 | + data["Cum. Sum"] = list(cum_sum_data) + padding |
325 | 360 |
|
326 | 361 | # print(data) |
327 | 362 | # We create two ColumnDataSources, because they have to be n*n and |
|
0 commit comments