@@ -96,6 +96,22 @@ def map_question_to_qkey(self, question: str, lang: str = LANGUAGE) -> list:
9696
9797 def select_data (self , question , data_filters , data_filters_method , filter_by = FILTER_BY ):
9898 """Select and transform data for visualization"""
99+
100+ def get_real_research_areas (data_filters ):
101+ """
102+ Separate pseudo-categories (All, Cum. Sum) from real research area values.
103+
104+ Args:
105+ data_filters: List of selected filter values
106+
107+ Returns:
108+ tuple: (real_research_areas, pseudo_categories)
109+ """
110+ pseudo_categories = {"All" , "Cum. Sum" }
111+ real_areas = [area for area in data_filters if area not in pseudo_categories ]
112+ pseudo_cats = [area for area in data_filters if area in pseudo_categories ]
113+ return real_areas , pseudo_cats
114+
99115 print (question )
100116 q_index = self .map_question_to_qkey (question )
101117 print (q_index )
@@ -137,6 +153,9 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
137153 df , q_index_clean , display_dict = HCS_MCSUBQUESTIONS_FLATTENED
138154 )
139155
156+ # Separate real research areas from pseudo-categories
157+ real_research_areas , pseudo_categories = get_real_research_areas (data_filters )
158+
140159 # Handle data filtering based on what's selected
141160 if "All" in data_filters and len (data_filters ) == 1 :
142161 # Only "All" is selected - use the aggregated data from all research areas
@@ -152,13 +171,18 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
152171 data = data_all
153172 y_keys = ["All" ] + [data_all .get (q_index_clean [0 ], [])]
154173 else :
155- # Specific research areas are selected - filter the data
156- exclude = []
157- for field in RESEARCH_FIELDS :
158- if field not in data_filters :
159- exclude .append (field )
160- for filter_key in exclude :
161- df = df [df [filter_by ] != filter_key ]
174+ # Handle filtering based on real research areas (not pseudo-categories)
175+ if real_research_areas :
176+ # Real research areas are selected - filter the data to include only those
177+ exclude = []
178+ for field in RESEARCH_FIELDS :
179+ # Only exclude fields that are real research areas (not pseudo-categories)
180+ if field not in {"All" , "Cum. Sum" } and field not in real_research_areas :
181+ exclude .append (field )
182+
183+ for filter_key in exclude :
184+ df = df [df [filter_by ] != filter_key ]
185+ # else: Only pseudo-categories selected - df remains unfiltered
162186
163187 # Special case: if we're querying the research area question itself,
164188 # we need different logic to avoid double-grouping by research areas
@@ -168,24 +192,29 @@ def select_data(self, question, data_filters, data_filters_method, filter_by=FIL
168192 # Use the full research area list as x_value for proper positioning
169193 data ["x_value" ] = data_all .get (q_index_0 , [])
170194
171- # Create arrays for each selected research area
172- for area in data_filters :
173- if area != "All" :
174- # Create array with value at correct position, zeros elsewhere
175- area_array = np .zeros (len (data ["x_value" ]))
176- if area in data ["x_value" ]:
177- position = data ["x_value" ].index (area )
178- area_count = len (df [df [filter_by ] == area ])
179- area_array [position ] = area_count
180- data [area ] = area_array
181-
182- y_keys = [area for area in data_filters if area != "All" ]
195+ # Create arrays for each real research area selected
196+ for area in real_research_areas :
197+ # Create array with value at correct position, zeros elsewhere
198+ area_array = np .zeros (len (data ["x_value" ]))
199+ if area in data ["x_value" ]:
200+ position = data ["x_value" ].index (area )
201+ area_count = len (df [df [filter_by ] == area ])
202+ area_array [position ] = area_count
203+ data [area ] = area_array
204+
205+ # Handle "Cum. Sum" pseudo-category if selected
206+ if "Cum. Sum" in pseudo_categories :
207+ # Add cumulative sum data (this will be the total across all areas)
208+ data ["Cum. Sum" ] = data_all .get ("All" , [])
209+
210+ # Set y_keys to include both real areas and pseudo-categories
211+ y_keys = real_research_areas + [cat for cat in pseudo_categories if cat != "All" ]
183212 else :
184213 # Normal case: use prepare_data_research_field for other questions
185214 data , y_keys = prepare_data_research_field (df , q_index )
186215
187- # Add "All" data if it's selected in the filters
188- if "All" in data_filters :
216+ # Add "All" data if it's selected in the pseudo-categories
217+ if "All" in pseudo_categories :
189218 all_data = data_all .get ("All" , [])
190219
191220 # Special handling for research area question
0 commit comments