Contingency/initialiser/initialiser.py at ab4076ffae576740ed724e4ded2443bd4bcd4c54 · Project-Sothea/Contingency · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import os

import sys

# Add the parent directory of 'columns' to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../")))

import pandas as pd
from openpyxl.formatting.rule import FormulaRule
from openpyxl.reader.excel import load_workbook
from openpyxl.styles import PatternFill, Side, Border, Alignment

import columns
from columns import DataType
from columns import Types

"""
Initialise an empty Excel workbook with data validation rules, formatting, and dropdowns.
Generates an empty DataSheet.xlsx with column headers based on types.csv.
Places generated DataSheet.xlsx in the same directory as the script.
"""
class Initialiser:
    def __init__(self, patientdata_file_path, types_filepath):
        """
        Initialize the Validator object with a CSV file path.

        Args:
            csv_folder_path (str): The path to the CSV file to be validated.
        """
        self.csv_folder_path: str = patientdata_file_path
        self.types_filepath: str = types_filepath
        self.df: pd.DataFrame = None
        self.wb = None
        self.ws = None
        self.wb_name = "DataSheet.xlsx"
        self.ws_name = "Sheet1"
        self.types: columns.Types

    def initialise(self):
        """
        Do preliminary initialisation
        """
        try:
            self.types = Types(self.types_filepath)
            print(f"Types '{self.types_filepath}' read successfully!")
        except Exception as e:
            print(f"Error reading types file '{self.types_filepath}': {e}")

    def readCSV(self):
        """
        - Generates an empty DataFrame with column headers based on types.csv
        - Creates an empty Excel workbook with the correct column structure
        - Initializes the Excel workbook and worksheet
        """
        try:
            # Generate a mapping from csv_name to datasheet_name
            csv_to_datasheet_map = {}
            for category in self.types.categories:
                for field in category.fields:
                    csv_to_datasheet_map[field.csv_name] = field.datasheet_name

            # Create an empty DataFrame with all datasheet column names
            datasheet_columns = [field.datasheet_name for category in self.types.categories for field in category.fields]
            self.df = pd.DataFrame(columns=datasheet_columns)

            print("Empty DataFrame created with column headers based on types.csv")

            # Save empty DataFrame to an Excel file
            self.df.to_excel(self.wb_name, sheet_name=self.ws_name, index=False)

            # Load and store the workbook
            self.wb = load_workbook(self.wb_name)

            if self.ws_name not in self.wb.sheetnames:
                raise ValueError(f"Sheet '{self.ws_name}' does not exist in the workbook.")

            self.ws = self.wb[self.ws_name]
            print(f"Empty Excel workbook '{self.wb_name}' created successfully!")

        except Exception as e:
            print(f"Error creating empty Excel file: {e}")

    def applyValidationRules(self):
        """
        - Apply data validation rules to an Excel workbook.
        - Disable column renaming #TODO
        - Also adds "Example" Row to workbook #TODO
        """
        try:
            # Add the dv rules listed in DataType enums to the worksheet
            for dt in DataType:
                if dt.validation != None:
                    self.ws.add_data_validation(dt.validation)

            # for each category in types, for each field in the category, apply the validation rule to its datasheet range
            for category in self.types.categories:
                for field in category.fields:
                    if field.datatype.validation != None:
                        field.datatype.validation.add(field.datasheet_range)

            self.wb.save(self.wb_name)
            print("Validation rules applied and saved successfully!")

        except Exception as e:
            print(f"Error applying validation rules: {e}")

    def applyFormatting(self):
        """
        Apply formatting rules to an Excel workbook.
        - Applies a border to first 500 rows
        - Colour first row of cells according to their category
        - Pin first row, and first 3 columns of cells
        - Resizes column width to fit the content.
        - First Row: Triples height, centres text
        - Bold required fields #TODO
        """

        # Apply border to all cells
        border_style = Border(
            left=Side(border_style="thin", color="000000"),
            right=Side(border_style="thin", color="000000"),
            top=Side(border_style="thin", color="000000"),
            bottom=Side(border_style="thin", color="000000")
        )

        for row in self.ws.iter_rows(min_row=1, max_row=500, min_col=1, max_col=self.ws.max_column):
            for cell in row:
                cell.border = border_style
        print("Borders applied successfully!")

        # Colours first row of cells according to their category
        for category in self.types.categories:
            for field in category.fields:
                custom_fill = PatternFill(start_color=field.colour, end_color=field.colour, fill_type="solid")
                start_cell, end_cell = field.datasheet_range.split(":")
                # Extract the column letters from start_cell and adjust the row to 1
                column_letter = ''.join([char for char in start_cell if char.isalpha()])
                start_cell_adjusted = f"{column_letter}1"

                # Apply the color to the adjusted start cell (now row 1)
                self.ws[start_cell_adjusted].fill = custom_fill
        print("Cell colours applied successfully!")

        # Pin first row, and first 3 columns of cells
        self.ws.freeze_panes = "D2"  # Freeze everything above row 2 and to the left of column D

        # First Row: Triples height, centres text
        for col in self.ws.columns:
            max_length = 0
            column = col[0].column_letter
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = min(40, max_length + 2)
            self.ws.column_dimensions[column].width = adjusted_width

        self.ws.row_dimensions[1].height = 70
        for cell in self.ws[1]:  # Loop through each cell in the first row
            cell.alignment = Alignment(wrap_text=True, horizontal="center", vertical="center")


        self.wb.save(self.wb_name)
        print("Formatting applied and saved successfully!")

    def applyConditionalFormatting(self):
        """
        Apply conditional formatting to highlight empty required cells in rows where
        any cell in the same category is filled.
        """
        red_fill = PatternFill(start_color="ea9999", end_color="ea9999",
                               fill_type="solid")  # Red fill for required fields

        for category in self.types.categories:  # Iterate through each category
            # Collect column letters for required fields in this category
            required_columns = [
                ''.join(filter(str.isalpha, f.datasheet_range.split(":")[0]))
                for f in category.fields if f.required  # Only include required fields
            ]
            category_columns = [
                ''.join(filter(str.isalpha, f.datasheet_range.split(":")[0]))
                for f in category.fields
            ]

            for row_number in range(2, 301): # start and end rows
                # Mark a cell red only if it is empty
                for col in required_columns:
                    formula = f'=ISBLANK(${col}{row_number})'
                    rule = FormulaRule(formula=[formula], fill=red_fill)
                    self.ws.conditional_formatting.add(f"{col}{row_number}", rule)


        self.wb.save(self.wb_name)
        print("Conditional formatting applied successfully!")

    def rename_columns(self):
        """
        This method is no longer needed since we generate empty sheets directly with datasheet names.
        Kept for backward compatibility but does nothing.
        """
        # No-op: columns are already in datasheet format when creating empty DataFrame
        pass

if __name__ == "__main__":
    # types_file_path is still needed to define the schema
    types_file_path = "../types.csv"
    # patientdata_file_path is no longer used but kept for backward compatibility
    patientdata_file_path = "../cron/backups"  # Not used anymore, kept for compatibility

    # delete the existing DataSheet.xlsx file
    try:
        os.remove("DataSheet.xlsx")
    except:
        pass

    initialiser = Initialiser(patientdata_file_path, types_file_path)
    initialiser.initialise()
    initialiser.readCSV()
    initialiser.applyValidationRules()
    initialiser.applyFormatting()
    initialiser.applyConditionalFormatting()