Skip to content

Commit 4196229

Browse files
committed
First status to include multiple choice still buggy
1 parent 1456286 commit 4196229

16 files changed

Lines changed: 1700 additions & 360 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# custom
2+
*.csv
3+
14
# Byte-compiled / optimized / DLL files
25
__pycache__/
36
*.py[cod]

dashboard/analysis.py

Lines changed: 144 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ def calculate_crosstab(df: pd.DataFrame, data_key1: str, data_key2: str, id_vars
3838
return df_crosstab
3939

4040

41-
def filter_dataframe(df: pd.DataFrame, include: list=None, exclude: List[Tuple[str, list]]=None, exclude_nan=True, as_type="category") -> pd.DataFrame:
41+
def filter_dataframe(df: pd.DataFrame, include: list=None, exclude: List[Tuple[str, list]]=None,
42+
exclude_nan=True, exclude_anonymized=True, as_type="category") -> pd.DataFrame:
4243
"""
4344
Filter pandas dataframe
4445
@@ -48,29 +49,163 @@ def filter_dataframe(df: pd.DataFrame, include: list=None, exclude: List[Tuple[s
4849
df = filter_dataframe(surveydata, include=["careerLevel", "docStructured", "researchArea"], exclude=[("careerLevel", to_exclude)])
4950
```
5051
"""
51-
52+
53+
5254
if include is not None:
5355
df = df[include].dropna(how = "all", subset = include).astype(as_type)
5456

5557
for key, val in exclude:
56-
print(key, val)
58+
#print(key, val)
5759
df = df.loc[~df[key].isin(val)]
5860

5961
if exclude_nan:
6062
for key in df.keys():
6163
df = df.loc[~df[key].isna()]
64+
65+
if exclude_anonymized:
66+
df = df.replace(to_replace="Anonymized", value="")
67+
6268
return df
6369

64-
def get_all_values(df: pd.DataFrame, key: str) -> dict:
70+
def get_all_values(df: pd.DataFrame, keylist: List[str], display_dict=None) -> dict:
6571
"""
66-
Count all values of a given key in a data frame and
72+
Count all values of a given key from a key list in a data frame and
6773
return these values in a dictionary sorted.
74+
6875
"""
69-
all_areas = df[key].value_counts()
70-
all_areas = all_areas.sort_index()
71-
data = {'All': all_areas.values, key:list(all_areas.keys())}
76+
if len(keylist) == 1:
77+
key = keylist[0]
78+
all_areas = df[key].value_counts()
79+
all_areas = all_areas.sort_index()
80+
data = {'All': all_areas.values, key:list(all_areas.keys())}
81+
else: # multiple keys now the keys become the xticks
82+
combined = {}
83+
for key in keylist:
84+
if display_dict is not None:
85+
xtick = display_dict[key]
86+
else:
87+
xtick = key
88+
xtick = xtick.replace(' \n', '')
89+
temp = df[key]
90+
temp.replace(to_replace=True, value=xtick, inplace=True)
91+
temp.replace(to_replace=False, value=None, inplace=True)
92+
a = temp.value_counts()
93+
if a.empty:
94+
combined[xtick] = 0
95+
else:
96+
for i, ke in enumerate(a.keys()):
97+
# because other can contain all... others..
98+
ke = ke.lower() # sometimes there are mixed upper and lower case keys...
99+
ke = ke.replace(' \n', '') # some are with and without breaks
100+
temp_val = combined.get(ke, 0)
101+
temp_val = temp_val + a.values[i]
102+
combined[ke] = temp_val
103+
# greedy, there is probably a pandas way to do this...
104+
# there is a problem if df is empty, i.e temp.value_counts() True 0
105+
for i, ke in enumerate(a.keys()):
106+
ke = ke.lower() # sometimes there are mixed upper and lower case keys...
107+
ke = ke.replace(' \n', '') # some are with and without breaks
108+
temp_val = combined.get(ke, 0)
109+
temp_val = temp_val + a.values[i]
110+
combined[ke] = temp_val
111+
data = {'All' : list(combined.values()), key:list(combined.keys())}
72112
return data
73113

114+
def prepare_data_research_field(df: pd.DataFrame, keylist:List[str], key2:str='researchArea', sort_as=None, display_dict= None):# -> dict, list:
115+
"""Creates a dict dictionary with data in the form needed by the plotting functions
116+
117+
We prepare several outputs, i.e y_keys because they can have different length and one should be able to create a
118+
ColumnDataSource by ColumnDataSource(data=data)
119+
:param df: [description]
120+
:type df: pd.DataFrame
121+
:param key: [description]
122+
:type key: str
123+
124+
example:
125+
prepare_data_research_field(df, key=careerLevel)
126+
{'Cum. Sum': array([ 0, 0, 130, 128, 148, 272, 0]),
127+
'careerLevel': ['Director (of the institute)',
128+
'Other',
129+
'PhD student',
130+
'Postdoc',
131+
'Principal Investigator',
132+
'Research associate',
133+
'Undergraduate / Masters student'],
134+
'researchArea': ['Engineering Science',
135+
'Physics',
136+
'Life Science',
137+
'Earth Science',
138+
'Chemistry',
139+
'Other',
140+
'Psychology',
141+
'Mathematics'],
142+
'Engineering Science': array([ 0, 0, 47, 30, 52, 134, 0]),
143+
'Physics': array([ 0, 0, 33, 38, 39, 57, 0]),
144+
'Life Science': array([ 0, 0, 28, 29, 27, 33, 0]),
145+
'Earth Science': array([ 0, 0, 8, 11, 18, 32, 0]),
146+
'Chemistry': array([ 0, 0, 9, 12, 6, 4, 0]),
147+
'Other': array([0, 0, 1, 2, 3, 6, 0]),
148+
'Psychology': array([0, 0, 3, 2, 3, 2, 0]),
149+
'Mathematics': array([0, 0, 1, 4, 0, 4, 0])}
150+
151+
"""
152+
research_areas = list(df[key2].value_counts().keys())
153+
y_keys = ['Cum. Sum'] + research_areas
154+
# Multiple columns will be combined. A single column will be treated differently
155+
if len(keylist) == 1:
156+
key = keylist[0]
157+
all_areas = df[key].value_counts()
158+
all_areas = all_areas.sort_index()
159+
data = {'Cum. Sum': all_areas.values, key:list(all_areas.keys()), 'x_value': list(all_areas.keys())}
160+
for area in research_areas:
161+
area_counts = df[df[key2] == area][key].value_counts()
162+
area_counts = area_counts.sort_index()
163+
data[area] = area_counts.values
164+
else:
165+
combined = {}
166+
data = {}
167+
for key in keylist:
168+
if display_dict is not None:
169+
xtick = display_dict[key]
170+
else:
171+
xtick = key
172+
xtick = xtick.replace(' \n', '')
173+
temp = df[key]
174+
temp.replace(to_replace=True, value=xtick, inplace=True)
175+
temp.replace(to_replace=False, value=None, inplace=True)
176+
a = temp.value_counts()
177+
# greedy, there is probably a pandas way to do this...
178+
# there is a problem if df is empty, i.e temp.value_counts() True 0
179+
for i, ke in enumerate(a.keys()):
180+
# because other can contain all... others..
181+
ke = ke.lower() # sometimes there are mixed upper and lower case keys...
182+
ke = ke.replace(' \n', '') # some are with and without breaks
183+
temp_val = combined.get(ke, 0)
184+
temp_val = temp_val + a.values[i]
185+
combined[ke] = temp_val
186+
187+
for area in research_areas:
188+
area_counts = df[df[key2] == area][key]
189+
temp = data.get(area, [])
190+
191+
area_counts.replace(to_replace=True, value=xtick, inplace=True)
192+
area_counts.replace(to_replace=False, value=None, inplace=True)
193+
area_counts.value_counts()
194+
area_counts = area_counts.sort_index()
195+
196+
print(area_counts)
197+
if area_counts.empty:
198+
temp.append(0)
199+
else:
200+
temp.append(list(area_counts.values))
201+
data[area] = temp
202+
203+
data['Cum. Sum'] = list(combined.values())
204+
data['x_value'] = list(combined.keys())
205+
206+
return data, y_keys
207+
208+
'''
74209
def prepare_data_research_field(df: pd.DataFrame, key:str, key2:str='researchArea', sort_as=None):# -> dict, list:
75210
"""Creates a dict dictionary with data in the form needed by the plotting functions
76211
@@ -120,7 +255,7 @@ def prepare_data_research_field(df: pd.DataFrame, key:str, key2:str='researchAre
120255
data[area] = area_counts.values
121256
122257
return data, y_keys
123-
258+
'''
124259
'''
125260
def prepare_data_research_field(df: pd.DataFrame, key:str):
126261
"""AI is creating summary for prepare_data_researchfield

dashboard/data/display_specifications/barchart_allowed.txt

Lines changed: 0 additions & 16 deletions
This file was deleted.

dashboard/data/display_specifications/corr_chart_allowed.txt

Lines changed: 0 additions & 16 deletions
This file was deleted.

dashboard/data/display_specifications/filters.txt

Lines changed: 0 additions & 9 deletions
This file was deleted.

dashboard/data/display_specifications/filters_methods.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)