Materials-Data-Science-and-Informatics
diff --git a/‎Citation.cff‎
Lines changed: 1 addition & 1 deletion b/‎Citation.cff‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dashboard/analysis.py‎
Lines changed: 35 additions & 26 deletions b/‎dashboard/analysis.py‎
Lines changed: 35 additions & 26 deletions
diff --git a/‎dashboard/data/display_specifications/hcs_clean_dictionaries.py‎
Lines changed: 87 additions & 3 deletions b/‎dashboard/data/display_specifications/hcs_clean_dictionaries.py‎
Lines changed: 87 additions & 3 deletions
diff --git a/‎dashboard/data/display_specifications/hmc_colordicts.py‎
Lines changed: 2 additions & 0 deletions b/‎dashboard/data/display_specifications/hmc_colordicts.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎dashboard/data/display_specifications/hmc_custom_layout.py‎
Lines changed: 45 additions & 0 deletions b/‎dashboard/data/display_specifications/hmc_custom_layout.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎dashboard/data/download_data.py‎
Lines changed: 1 addition & 1 deletion b/‎dashboard/data/download_data.py‎
Lines changed: 1 addition & 1 deletion
@@ -26,7 +26,7 @@ authors:
     email: j.broeder@fz-juelich.de
     affiliation: Forschungszentrum Jülich GmbH (FZJ)
   - family-names: Gerlich
-    given-names: Silke
+    given-names: Silke Christine
     orcid: 'https://orcid.org/0000-0003-3043-5657'
     email: s.gerlich@fz-juelich.de
     affiliation: Forschungszentrum Jülich GmbH (FZJ)
 
@@ -18,6 +18,7 @@
 
 def calculate_crosstab(df: pd.DataFrame, data_key1: str, data_key2: str, id_vars: str=None, astype: str="int") -> pd.DataFrame:
     """Calulate the cross table for two keys in a given pandas data frame"""
+
     if id_vars is None:
         id_vars = data_key1
 
@@ -41,12 +42,15 @@ def calculate_crosstab(df: pd.DataFrame, data_key1: str, data_key2: str, id_vars
 def filter_dataframe(df: pd.DataFrame, include: list=None, exclude: List[Tuple[str, list]]=None, 
                 exclude_nan=True, exclude_anonymized=True, as_type="category") -> pd.DataFrame:
     """
-    Filter pandas dataframe
+    Filter a pandas dataframe
 
     example:
     ```
     to_exclude = ['Other', 'Undergraduate / Masters student', 'Director (of the institute)']
     df = filter_dataframe(surveydata, include=["careerLevel", "docStructured", "researchArea"], exclude=[("careerLevel", to_exclude)])
+
+    returns a dataFrame with columns ["careerLevel", "docStructured", "researchArea"], 
+    where rows which contain to_exclude values in the "careerLevel" column are removed
     ```
     """
 
@@ -93,24 +97,26 @@ def get_all_values(df: pd.DataFrame, keylist: List[str], display_dict=None) -> d
             if a.empty:
                 combined[xtick] = 0
             else:
+                # greedy, there is probably a pandas way to do this...
+                # there is a problem if df is empty, i.e temp.value_counts() True 0
                 for i, ke in enumerate(a.keys()):
                     # because other can contain all... others..
-                    ke = ke.lower() # sometimes there are mixed upper and lower case keys...
-                    ke = ke.replace(' \n', '') # some are with and without breaks
+                    #ke = ke.lower() # sometimes there are mixed upper and lower case keys...
+                    #ke = ke.replace(' \n', '') # some are with and without breaks
                     temp_val = combined.get(ke, 0)
                     temp_val = temp_val + a.values[i]
                     combined[ke] = temp_val
-            # greedy, there is probably a pandas way to do this...
-            # there is a problem if df is empty, i.e temp.value_counts() True 0
-            for i, ke in enumerate(a.keys()):
-                ke = ke.lower() # sometimes there are mixed upper and lower case keys...
-                ke = ke.replace(' \n', '') # some are with and without breaks
-                temp_val = combined.get(ke, 0)
-                temp_val = temp_val + a.values[i]
-                combined[ke] = temp_val
+
+            #for i, ke in enumerate(a.keys()):
+            #    ke = ke.lower() # sometimes there are mixed upper and lower case keys...
+            #    ke = ke.replace(' \n', '') # some are with and without breaks
+            #    temp_val = combined.get(ke, 0)
+            #    temp_val = temp_val + a.values[i]
+            #    combined[ke] = temp_val
         data = {'All' : list(combined.values()), key:list(combined.keys())}
     return data
 
+
 def prepare_data_research_field(df: pd.DataFrame, keylist:List[str], key2:str='researchArea', sort_as=None, display_dict= None):# -> dict, list:
     """Creates a dict dictionary with data in the form needed by the plotting functions
     
@@ -161,7 +167,8 @@ def prepare_data_research_field(df: pd.DataFrame, keylist:List[str], key2:str='r
             area_counts = df[df[key2] == area][key].value_counts()
             area_counts = area_counts.sort_index()
             data[area] = area_counts.values
-    else:        
+    else:
+        # Cum. Sum. is buggy?
         combined = {}
         data = {}
         for key in keylist:
@@ -176,28 +183,30 @@ def prepare_data_research_field(df: pd.DataFrame, keylist:List[str], key2:str='r
             a = temp.value_counts()
             # greedy, there is probably a pandas way to do this...
             # there is a problem if df is empty, i.e temp.value_counts() True 0
-            for i, ke in enumerate(a.keys()):
-                # because other can contain all... others..
-                ke = ke.lower() # sometimes there are mixed upper and lower case keys...
-                ke = ke.replace(' \n', '') # some are with and without breaks
-                temp_val = combined.get(ke, 0)
-                temp_val = temp_val + a.values[i]
-                combined[ke] = temp_val
-        
+            if a.empty:
+                combined[xtick] = 0
+            else:  
+                for i, ke in enumerate(a.keys()):
+                    # because other can contain all... others..
+                    #ke = ke.lower() # sometimes there are mixed upper and lower case keys...
+                    #ke = ke.replace(' \n', '') # some are with and without breaks
+                    temp_val = combined.get(ke, 0)
+                    temp_val = temp_val + a.values[i]
+                    combined[ke] = temp_val
+            
+            # now fill research area specifics
             for area in research_areas:
                 area_counts = df[df[key2] == area][key]
-                temp = data.get(area, [])
-
                 area_counts.replace(to_replace=True, value=xtick, inplace=True)
                 area_counts.replace(to_replace=False, value=None, inplace=True)
-                area_counts.value_counts()
+                area_counts = area_counts.value_counts()
                 area_counts = area_counts.sort_index()
-                
-                print(area_counts)
+                temp = data.get(area, [])
+                #print(area_counts)
                 if area_counts.empty:
                     temp.append(0)
                 else:
-                    temp.append(list(area_counts.values))
+                    temp.append(int(area_counts.values[0]))
                 data[area] = temp
 
         data['Cum. Sum'] = list(combined.values())
 
@@ -10,8 +10,6 @@
 ###############################################################################
 """
 A collection of dictionaries/maps to influence the display of certain categorical data
-
-@author: s.gerlich
 """
 
 ##########################################
@@ -2194,7 +2192,7 @@
 
 
 # This what will be displayed in selection widgets and titles
-HCSquestions = {
+HCSquestions_long = {
                 "EN" : {        
                         "PERBG1/_":"Which Helmholtz center do you typically work in?",
                         "PERBG2/_":"Please select the Helmholtz research field you associate yourself with.",
@@ -2278,6 +2276,91 @@
                         "SERVC3":"Sie haben es fast geschafft! Gerne können Sie Fragen, Wünsche oder Anregungen im folgenden Freitextfeld formulieren:"
                         }
                     }
+# These are used as displays on selection and for titles
+HCSquestions = { # shortened
+                "EN" : {        
+                        "PERBG1/_":"Helmholtz center participants typically work in",
+                        "PERBG2/_":"Helmholtz research field participants associate with.",
+                        "PERBG3/_":"Principle research area of participants.",
+                        "PERBG4/_":"Working years in research.",
+                        "PERBG6/_":"Career level of participants.",
+                        "PERBG7/_":"Do you have an ORCID ID?",
+                        "PERBG8/_":"Familiarity with the FAIR data guidelines",
+                        "RSDP1":"Origin of research data.",
+                        "RSDP1b/1":"Percentage of data sets recorded at large scale facilities.",
+                        "RSDP1c":"Large scale facilities used.",
+                        "RSDP2":"Research data generation methods used (selected).",
+                        "RSDP2b":"Research data generation methods used (specified).",
+                        "RSDP3":"Data formats used in research projects.",
+                        "RSDP7/_":"Amount of data a typical publication is based on.",
+                        "RSDP4/_":"Average time from planning to completion for projects. (months)",
+                        "RSDP8/_":"Experiments take ___ time than on average in my domain.",
+                        "RSDP11/_":"Data analyses take ___ time than on average in my domain.",
+                        "RSDP10/_":"Unpublished raw data kept in long-term storage (>10 years).",
+                        "DTPUB6/1":"Percentage of data sets made publicly available.",
+                        "DTPUB1b":"How data was publised.",
+                        "DTPUB5":"Repositories data is published in.",
+                        "DTPUB3":"Motivations to publish data.",
+                        "DTPUB4a":"Obstacles for publishing research data.",
+                        "DTPUB4b":"Discouragements for publishing data so far.",
+                        "RDMPR1":"Data storage for finished projects.",
+                        "RDMPR3":"Where are data generation and processing are documented.",
+                        "RDMPR7":"Information (metadata) typically used to describe research data.",
+                        "RDMPR8":"Information (metadata) typically documented in digital way.",
+                        "RDMPR9":"Information (metadata) typically gathered in automated way.",
+                        "RDMPR4/_":"Reseach data documented in a structued.",
+                        "RDMPR5/_":"Usage of internationally templates, schemas or standards.",
+                        "DTPUB7":"Metadata published along with research data.",
+                        "RDMPR6":"International standards in use.",
+                        "RDMPR10":"Three most important software applications used in research.",
+                        "RDMPR12":"Motivations to documented work in a structured way.",
+                        "RDMPR11":"Difficulties in collecting metadata as part of work.",
+                        "SERVC1":" Support or services needed by research data management area.",
+                        "SERVC2":"Interest in certain service formats.",
+                        "SERVC3":"Free feedback text field:"
+                        },
+                "DE" : {
+                        "PERBG1/_":"In welchem Helmholtz-Zentrum sind Sie in erster Linie tätig?",
+                        "PERBG2/_":"Welchem Helmholtz-Forschungsbereich ordnen Sie sich am ehesten zu?",
+                        "PERBG3/_":"Welcher Forschungsdisziplin ordnen Sie sich am ehesten zu?",
+                        "PERBG4/_":"Wie viele Jahre sind Sie bereits in der Forschung tätig?",
+                        "PERBG6/_":"Was ist Ihre aktuelle Position?",
+                        "PERBG7/_":"Haben Sie eine ORCID iD?",
+                        "PERBG8/_":"Wie vertraut sind Sie mit den FAIR-Data Leitlinien?",
+                        "RSDP1":"Bitte charakterisieren Sie den Ursprung Ihrer Forschungsdaten.",
+                        "RSDP1b/1":"Welcher Anteil Ihrer Datensätze wurde an Großforschungsanlagen (z.B. LHC, PETRA III, KATRIN ELBE, BESSY II) erfasst? (Angabe in Prozent)",
+                        "RSDP1c":"Bitte nennen Sie die genutzte Großforschungseinrichtung:",
+                        "RSDP2":"Mit welchen Methoden erheben Sie Ihre Forschungsdaten?",
+                        "RSDP2b":"Bitte spezifizieren Sie die Methoden, mit denen Sie Ihre Forschungsdaten erheben.",
+                        "RSDP3":"In welchen Datenformaten liegen die Daten vor, die Sie in Ihrem aktuellen Forschungsprojekt generieren bzw. nutzen?",
+                        "RSDP7/_":"Bitte schätzen Sie, auf welcher Datenmenge eine typische Veröffentlichung von Ihnen beruht.",
+                        "RSDP4/_":"Wie viel Zeit vergeht durchschnittlich von der Planung bis zum Abschluss der Datenaufnahme für Ihre Forschungsprojekte? (in Monaten)",
+                        "RSDP8/_":"Meine Experimente nehmen ___ Zeit in Anspruch als eine durchnittliche Untersuchung in meinem Forschungsbereich.",
+                        "RSDP11/_":"Meine Datenanalysen nehmen ___ Zeit in Anspruch als eine durchschnittliche Untersuchung in meinem Forschungsbereich.",
+                        "RSDP10/_":"Speichern Sie Rohdaten, die nicht publiziert werden, langfristig (10 Jahre und länger)?",
+                        "DTPUB6/1":"Bitte schätzen Sie, welchen relativen Anteil Ihrer Datensätze Sie publizieren. (Angabe in Prozent)",
+                        "DTPUB1b":"Wie haben Sie Ihre Daten publiziert?",
+                        "DTPUB5":"In welchen Repositorien haben Sie Ihre Daten veröffentlicht?",
+                        "DTPUB3":"Was motivierte Sie dazu, Ihre Forschungsdaten zu veröffentlichen? (Bitte wählen Sie bis zu 3 Antworten)",
+                        "DTPUB4a":"Auf welche Hindernisse sind Sie bei der Veröffentlichung Ihrer Forschungsdaten gestoßen?",
+                        "DTPUB4b":"Welche Bedenken oder Hindernisse haben Sie bisher davon abgehalten, Ihre Forschungsdaten zu veröffentlichen?",
+                        "RDMPR1":"Wo werden Ihre Forschungsdaten nach Abschluss eine Projekts hauptsächlich gespeichert?",
+                        "RDMPR3":"Wo dokumentieren Sie in Ihrem aktuellen Projekt die Arbeitsschritte, mit denen Ihre Daten erzeugt und verarbeitet werden?",
+                        "RDMPR7":"Mit welchen Informationen (Metadaten) beschreiben Sie normalerweise Ihre Forschungsdaten?",
+                        "RDMPR8/_":"Welche Informationen (Metadaten) davon erfassen Sie in der Regel digital?",
+                        "RDMPR9":"Welche dieser Informationen (Metadaten) erfassen Sie in der Regel automatisiert?",
+                        "RDMPR4/_":"Dokumentieren Sie Ihre Forschungsdaten auf strukturierte Weise? (z.B. mittels Formularen, Vorlagen oder Schemata)",
+                        "RDMPR5/_":"Verwenden Sie hierzu international genutzte Formulare, Schemata oder Standards?",
+                        "DTPUB7":"Welche dieser Metadaten publizieren Sie zusammen mit Ihren Forschungsdaten?",
+                        "RDMPR6":"Welche internationalen Standards nutzen Sie?",
+                        "RDMPR10":"Bitte nennen Sie die drei wichtigsten Softwareanwendungen, die Sie für Ihre Forschung verwenden.",
+                        "RDMPR12":"Was motiviert Sie dazu, Ihre Arbeitsschritte auf strukturierte Weise zu dokumentieren?",
+                        "RDMPR11":"Auf welche Hindernisse oder Schwierigkeiten sind Sie bei der Erfassung von Metadaten im Rahmen Ihrer Arbeit gestoßen?",
+                        "SERVC1":"In welchen Bereichen des Forschungsdatenmanagements haben Sie Bedarf an unterstützenden Angeboten?",
+                        "SERVC2":"Bitte bewerten Sie Ihr Interesse an den folgenden Service-Formaten.",
+                        "SERVC3":"Sie haben es fast geschafft! Gerne können Sie Fragen, Wünsche oder Anregungen im folgenden Freitextfeld formulieren:"
+                        }
+                    }
 
 
 # Dashboard specific white lists
@@ -2359,6 +2442,7 @@
 "Engineering Science",
 "Life Science",
 "Mathematics",
+"Other",
 "Physics",
 "Psychology"]
 
 
@@ -79,3 +79,5 @@ def make_Ramp( ramp_colors ):
 # continuous color scale based on Hub Info color palette
 hubInfoRamp = make_Ramp(hubInfoPalette)
 #cmr.view_cmap(hubInfoRamp)
+
+
@@ -0,0 +1,45 @@
+hmc_custom_css_accordion ='''
+.bk.card {
+  border: 1px solid rgba(0,0,0,.125);
+  border-radius: 0.25rem;
+}
+.bk.accordion {
+  border: 1px solid rgba(0,0,0,.125);
+}
+.bk.card-header {
+  align-items: center;
+  background-color: rgba(0, 0, 0, 0.03);
+  border-radius: 0.25rem;
+  display: inline-flex;
+  justify-content: start;
+  width: 100%;
+}
+.bk.accordion-header {
+  align-items: center;
+  background-color: rgba(0, 0, 0, 0.03);
+  border-radius: 0;
+  display: flex;
+  justify-content: start;
+  width: 100%;
+}
+.bk.card-button {
+  background-color: transparent;
+  margin-left: 0.5em;
+}
+.bk.card-header-row {
+  position: relative !important;
+}
+.bk.card-title {
+  align-items: left;
+  font-size: 1.4em;
+  font-weight: bold;
+  overflow-wrap: break-word;
+}
+.bk.card-header-row > .bk {
+  overflow-wrap: break-word;
+  text-align: left;
+}
+'''
+#rgba(0, 0, 0, 0.03);
+#"#005AA0" :  rgba(0, 90, 160, 0.53);
+#  background-color: rgba(0, 90, 160, 0.53);
@@ -15,7 +15,7 @@
 #url="https://access.gesis.org/sharing/2433/3778"
 
 def download_data(url="https://access.gesis.org/sharing/2433/3778", 
-    destination="dashboard/data/hmc_survey_2021_data_cleaned-csv"):
+    destination="dashboard/data/hmc_survey_2021_data_cleaned.csv"):
     """
     This function downloads the dataset for a given DOI
     """