Skip to content

Commit f5a54e2

Browse files
committed
separation of handling aggregation filters and real value based filters on overview plots
1 parent 4892780 commit f5a54e2

1 file changed

Lines changed: 50 additions & 21 deletions

File tree

survey_dashboard/core/data.py

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,22 @@ def map_question_to_qkey(self, question: str, lang: str = LANGUAGE) -> list:
9696

9797
def select_data(self, question, data_filters, data_filters_method, filter_by=FILTER_BY):
9898
"""Select and transform data for visualization"""
99+
100+
def get_real_research_areas(data_filters):
101+
"""
102+
Separate pseudo-categories (All, Cum. Sum) from real research area values.
103+
104+
Args:
105+
data_filters: List of selected filter values
106+
107+
Returns:
108+
tuple: (real_research_areas, pseudo_categories)
109+
"""
110+
pseudo_categories = {"All", "Cum. Sum"}
111+
real_areas = [area for area in data_filters if area not in pseudo_categories]
112+
pseudo_cats = [area for area in data_filters if area in pseudo_categories]
113+
return real_areas, pseudo_cats
114+
99115
print(question)
100116
q_index = self.map_question_to_qkey(question)
101117
print(q_index)
@@ -137,6 +153,9 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
137153
df, q_index_clean, display_dict=HCS_MCSUBQUESTIONS_FLATTENED
138154
)
139155

156+
# Separate real research areas from pseudo-categories
157+
real_research_areas, pseudo_categories = get_real_research_areas(data_filters)
158+
140159
# Handle data filtering based on what's selected
141160
if "All" in data_filters and len(data_filters) == 1:
142161
# Only "All" is selected - use the aggregated data from all research areas
@@ -152,13 +171,18 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
152171
data = data_all
153172
y_keys = ["All"] + [data_all.get(q_index_clean[0], [])]
154173
else:
155-
# Specific research areas are selected - filter the data
156-
exclude = []
157-
for field in RESEARCH_FIELDS:
158-
if field not in data_filters:
159-
exclude.append(field)
160-
for filter_key in exclude:
161-
df = df[df[filter_by] != filter_key]
174+
# Handle filtering based on real research areas (not pseudo-categories)
175+
if real_research_areas:
176+
# Real research areas are selected - filter the data to include only those
177+
exclude = []
178+
for field in RESEARCH_FIELDS:
179+
# Only exclude fields that are real research areas (not pseudo-categories)
180+
if field not in {"All", "Cum. Sum"} and field not in real_research_areas:
181+
exclude.append(field)
182+
183+
for filter_key in exclude:
184+
df = df[df[filter_by] != filter_key]
185+
# else: Only pseudo-categories selected - df remains unfiltered
162186

163187
# Special case: if we're querying the research area question itself,
164188
# we need different logic to avoid double-grouping by research areas
@@ -168,24 +192,29 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
168192
# Use the full research area list as x_value for proper positioning
169193
data["x_value"] = data_all.get(q_index_0, [])
170194

171-
# Create arrays for each selected research area
172-
for area in data_filters:
173-
if area != "All":
174-
# Create array with value at correct position, zeros elsewhere
175-
area_array = np.zeros(len(data["x_value"]))
176-
if area in data["x_value"]:
177-
position = data["x_value"].index(area)
178-
area_count = len(df[df[filter_by] == area])
179-
area_array[position] = area_count
180-
data[area] = area_array
181-
182-
y_keys = [area for area in data_filters if area != "All"]
195+
# Create arrays for each real research area selected
196+
for area in real_research_areas:
197+
# Create array with value at correct position, zeros elsewhere
198+
area_array = np.zeros(len(data["x_value"]))
199+
if area in data["x_value"]:
200+
position = data["x_value"].index(area)
201+
area_count = len(df[df[filter_by] == area])
202+
area_array[position] = area_count
203+
data[area] = area_array
204+
205+
# Handle "Cum. Sum" pseudo-category if selected
206+
if "Cum. Sum" in pseudo_categories:
207+
# Add cumulative sum data (this will be the total across all areas)
208+
data["Cum. Sum"] = data_all.get("All", [])
209+
210+
# Set y_keys to include both real areas and pseudo-categories
211+
y_keys = real_research_areas + [cat for cat in pseudo_categories if cat != "All"]
183212
else:
184213
# Normal case: use prepare_data_research_field for other questions
185214
data, y_keys = prepare_data_research_field(df, q_index)
186215

187-
# Add "All" data if it's selected in the filters
188-
if "All" in data_filters:
216+
# Add "All" data if it's selected in the pseudo-categories
217+
if "All" in pseudo_categories:
189218
all_data = data_all.get("All", [])
190219

191220
# Special handling for research area question

0 commit comments

Comments
 (0)