@@ -60,8 +60,17 @@ def filter_dataframe(df: pd.DataFrame, include: list=None, exclude: List[Tuple[s
6060 df = df .loc [~ df [key ].isna ()]
6161 return df
6262
63+ def get_all_values (df : pd .DataFrame , key : str ) -> dict :
64+ """
65+ Count all values of a given key in a data frame and
66+ return these values in a dictionary sorted.
67+ """
68+ all_areas = df [key ].value_counts ()
69+ all_areas = all_areas .sort_index ()
70+ data = {'All' : all_areas .values , key :list (all_areas .keys ())}
71+ return data
6372
64- def prepare_data_research_field (df : pd .DataFrame , key :str , key2 :str = 'researchArea' ):# -> dict, list:
73+ def prepare_data_research_field (df : pd .DataFrame , key :str , key2 :str = 'researchArea' , sort_as = None ):# -> dict, list:
6574 """Creates a dict dictionary with data in the form needed by the plotting functions
6675
6776 We prepare several outputs, i.e y_keys because they can have different length and one should be able to create a
@@ -73,7 +82,7 @@ def prepare_data_research_field(df: pd.DataFrame, key:str, key2:str='researchAre
7382
7483 example:
7584 prepare_data_research_field(df, key=careerLevel)
76- {'All ': array([ 0, 0, 130, 128, 148, 272, 0]),
85+ {'Cum. Sum ': array([ 0, 0, 130, 128, 148, 272, 0]),
7786 'careerLevel': ['Director (of the institute)',
7887 'Other',
7988 'PhD student',
@@ -132,6 +141,12 @@ def prepare_data_research_field(df: pd.DataFrame, key:str):
132141 return data
133142'''
134143
135- def percentage_to_area (data , scale_m = 1.0 ):
144+ def percentage_to_area (data : List [float ], scale_m : float = 1.0 ) -> List [float ]:
145+ """
146+ Convert numbers in a given array to a radius,
147+
148+ where a circle of with that radius is proportionate to the circle area
149+ Useful for circle plots where the area should be proportional to the value
150+ """
136151 radius_data = [2 * math .sqrt (val * scale_m / math .pi ) for val in data ]
137152 return radius_data
0 commit comments