Skip to content

file_reader_utils

Description: This module contains the FileReaderUtil class, which provides methods to read various types of files. The supported file types include PDF, Word, CSV, and Excel files.

FileReaderUtil

Description

| This class contains different methods that can be used to read various types of files, | such as Word files, CSV files, and PDF files.

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
class FileReaderUtil:
    """
    Description:
        |  This class contains different methods that can be used to read various types of files,
        |  such as Word files, CSV files, and PDF files.
    """

    def __init__(self):
        self.__exceptions_generic = CoreExceptions()
        self.logger = CoreLogger(name=__name__).get_logger()

    def read_pdf_file(self, filepath: str) -> list:
        """
        Reads a PDF file and returns its content as a list of strings.

        Args:
            filepath (str): The complete path of the PDF file to be read.

        Returns:
            list: A list of strings representing the text content of the PDF.

        Examples:
            >> pdf_content = FileReaderUtil().read_pdf_file("path/to/file.pdf")
        """
        if not filepath.endswith('.pdf'):
            self.logger.error("The provided file is not a PDF.")
            raise ValueError("The provided file is not a PDF.")
        content_list = []
        try:
            with fitz.open(filepath) as doc:
                for page in doc:
                    text = page.get_text("text")
                    paragraph = text.replace("\n", " ").replace("\r", "")
                    content_list.append(paragraph)
            self.logger.info("Successfully read PDF file: %s", filepath)
            return content_list

        except Exception as e:
            error_description = f"An error occurred while reading the PDF file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def read_excel(self, filepath: str, sheet_reference: Union[str, int, None] = None) -> list:
        """
        Reads an Excel file and returns the specified sheet's content.

        Args:
            filepath (str): The complete path of the Excel file to be read.
            sheet_reference (str or int, optional): The name or index of the sheet to read.
                If None, the active sheet will be read.

        Returns:
            list: The content of the specified sheet.

        Examples:
            >> values = FileReaderUtil().read_excel("path/to/file.xlsx", "Sheet1")
            >> values = FileReaderUtil().read_excel("path/to/file.xlsx", 0)  # By index
        """
        warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        try:
            workbook = openpyxl.load_workbook(filepath, data_only=True)
            if isinstance(sheet_reference, str):
                if sheet_reference in workbook.sheetnames:
                    sheet = workbook[sheet_reference]
                else:
                    self.logger.error("Sheet name %s does not exist.", sheet_reference)
                    raise ValueError(f"Sheet name '{sheet_reference}' does not exist.")
            elif isinstance(sheet_reference, int):
                if 0 <= sheet_reference < len(workbook.worksheets):
                    sheet = workbook.worksheets[sheet_reference]
                else:
                    self.logger.error("Sheet index %s is out of range.", sheet_reference)
                    raise ValueError(f"Sheet index '{sheet_reference}' is out of range.")
            else:
                sheet = workbook.active
            content_list = [list(row) for row in sheet.iter_rows(values_only=True)]
            self.logger.info("Successfully read Excel file: %s, sheet: %s", filepath, sheet.title)
            return content_list
        except Exception as e:
            error_description = f"An error occurred while reading the Excel file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def read_word_file(self, filepath: str) -> list:
        """
        Reads a Word file and returns its content as a list of strings.

        Args:
            filepath (str): The complete path of the Word file to be read.

        Returns:
            list: A list of strings representing the text content of the Word file.

        Examples:
            >> word_content = FileReaderUtil().read_word_file("path/to/file.docx")
        """
        try:
            with zipfile.ZipFile(filepath) as document:
                xml_content = document.read("word/document.xml")
            string_content = xml_content.decode("utf-8", errors="ignore")
            soup = BeautifulSoup(string_content, features="lxml")
            text_elements = soup.find_all("w:t")
            content_list = [text.text for text in text_elements]
            self.logger.info("Successfully read Word file: %s", filepath)
            return content_list
        except Exception as e:
            error_description = f"An error occurred while reading the Word file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def read_csv_file(self, filepath: str) -> list:
        """
        Reads a CSV file and returns its content as a list of rows.

        Args:
            filepath (str): The complete path of the CSV file to be read.

        Returns:
            list: A list of rows from the CSV file.

        Examples:
            >> csv_content = FileReaderUtil().read_csv_file("path/to/file.csv")
        """
        try:
            with open(filepath, "r", newline='', encoding='utf-8') as csv_file:
                reader = csv.reader(csv_file)
                content_list = list(reader)
            self.logger.info("Successfully read CSV file: %s", filepath)
            return content_list
        except Exception as e:
            error_description = f"An error occurred while reading the CSV file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def extract_7z_file(self, zip_file_name: str, target_dir: str) -> str:
        """
        Extracts a 7Z file to the specified target directory.

        Args:
            zip_file_name (str): The complete path of the 7Z file to be extracted.
            target_dir (str): The target directory where the files will be extracted.

        Returns:
            str: The path of the first extracted file.

        Examples:
            >> extracted_file = FileReaderUtil().
            extract_7z_file("path/to/file.7z", "path/to/extract")
        """
        if platform.system().upper() != "WINDOWS":
            error_message = "7Z extraction is only supported on Windows."
            self.logger.error(error_message)
            raise EnvironmentError(error_message)

        try:
            with py7zr.SevenZipFile(zip_file_name, mode="r") as archive:
                archive.extractall(path=target_dir)
                extracted_file_names = archive.getnames()
                if not extracted_file_names:
                    raise ValueError("No files were extracted from the archive.")
                extracted_file_path = os.path.join(target_dir, extracted_file_names[0])
                self.logger.info("Successfully extracted 7Z file: %s to %s",
                                 zip_file_name, target_dir)
                return extracted_file_path
        except Exception as e:
            error_description = f"The provided file is not supported: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def unzip_zip_file(self, source_path: str, destination_path: str) -> None:
        """
        Unzips a ZIP file to the specified destination path.

        Args:
            source_path (str): The complete path of the ZIP file to be unzipped.
            destination_path (str): The destination path where the files will be extracted.

        Returns:
            None

        Examples:
            >> FileReaderUtil().unzip_zip_file("path/to/file.zip", "path/to/extract")
        """
        try:
            if not os.path.exists(destination_path):
                os.makedirs(destination_path)
            with zipfile.ZipFile(source_path, 'r') as zip_file:
                zip_file.extractall(destination_path)
            self.logger.info("Successfully unzipped ZIP file: %s to %s",
                             source_path, destination_path)
        except Exception as e:
            error_description = f"The provided file is not supported: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def read_txt_file(self, filepath: str) -> str:
        """
        Reads the contents of a text file.

        Args:
            filepath (str): The complete path of the text file to be read.

        Returns:
            str: The contents of the text file.

        Raises:
            Exception: If an error occurs while reading the text file.

        Examples:
            >> content = FileReaderUtil().read_txt_file("path/to/file.txt")
        """
        try:
            with open(filepath, 'r', encoding='utf-8') as txt_file:
                content = txt_file.read()
            self.logger.info("Successfully read text file: %s", filepath)
            return content
        except Exception as e:
            error_description = f"An error occurred while reading the text file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def write_txt_file(self, filepath: str, content: str) -> None:
        """
        Writes content to a text file.

        Args:
            filepath (str): The complete path of the text file to be written.
            content (str): The content to write to the text file.

        Returns:
            None

        Raises:
            Exception: If an error occurs while writing to the text file.

        Examples:
            >> FileReaderUtil().write_txt_file("path/to/file.txt", "Hello, World!")
        """
        try:
            with open(filepath, 'w', encoding='utf-8') as txt_file:
                txt_file.write(content)
            self.logger.info("Successfully wrote to text file: %s", filepath)
        except Exception as e:
            error_description = f"An error occurred while writing to the text file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def write_excel_file(self, filepath: str, data: list[dict]) -> None:
        """
        Writes data to an Excel file.

        Args:
            filepath (str): The complete path of the Excel file to be written.
            data (list of dict): The data to write to the Excel file.

        Returns:
            None

        Raises:
            Exception: If an error occurs while writing to the Excel file.

        Examples:
            >> data = [{'Name': 'John', 'Age': 30}, {'Name': 'Jane', 'Age': 25}]
            >> FileReaderUtil().write_excel_file("path/to/file.xlsx", data)
        """
        try:
            df = pd.DataFrame(data)
            df.to_excel(filepath, index=False)
            self.logger.info("Successfully wrote to Excel file: %s", filepath)
        except Exception as e:
            error_description = f"An error occurred while writing to the Excel file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def write_word_file(self, filepath: str, content: list[str]) -> None:
        """
        Writes content to a Word file.

        Args:
            filepath (str): The complete path of the Word file to be written.
            content (list of str): The content to write to the Word file, where each
            string is a paragraph.

        Returns:
            None

        Raises:
            Exception: If an error occurs while writing to the Word file.

        Examples:
            >> content = ["First paragraph.", "Second paragraph."]
            >> FileReaderUtil().write_word_file("path/to/file.docx", content)
        """
        try:
            doc = Document()
            for paragraph in content:
                doc.add_paragraph(paragraph)
            doc.save(filepath)
            self.logger.info("Successfully wrote to Word file: %s", filepath)
        except Exception as e:
            error_description = f"An error occurred while writing to the Word file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def write_csv_file(self, filepath: str, data: list[list]) -> None:
        """
        Writes data to a CSV file.

        Args:
            filepath (str): The complete path of the CSV file to be written.
            data (list of list): The data to write to the CSV file, where each inner list
            represents a row.

        Returns:
            None

        Raises:
            Exception: If an error occurs while writing to the CSV file.

r4[p'        Examples:
            >> data = [['Name', 'Age'], ['John', 30], ['Jane', 25]]
            >> FileReaderUtil().write_csv_file("path/to/file.csv", data)
        """
        try:
            with open(filepath, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerows(data)
            self.logger.info("Successfully wrote to CSV file: %s", filepath)
        except Exception as e:
            error_description = f"An error occurred while writing to the CSV file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

    def delete_file(self, filepath: str) -> None:
        """
        Deletes a file from the filesystem.

        Args:
            filepath (str): The complete path of the file to be deleted.

        Returns:
            None

        Raises:
            Exception: If an error occurs while deleting the file, or if the file does not exist.

        Examples:
            >> FileReaderUtil().delete_file("path/to/file.txt")
        """
        try:
            if os.path.exists(filepath):
                os.remove(filepath)
                self.logger.info("Successfully deleted file: %s", filepath)
            error_message = f"The file '{filepath}' does not exist."
            self.logger.error(error_message)
            raise error_message
        except Exception as e:
            error_description = f"An error occurred while deleting the file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

delete_file(filepath)

Deletes a file from the filesystem.

Parameters:

Name Type Description Default
filepath str

The complete path of the file to be deleted.

required

Returns:

Type Description
None

None

Raises:

Type Description
Exception

If an error occurs while deleting the file, or if the file does not exist.

Examples:

FileReaderUtil().delete_file("path/to/file.txt")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
def delete_file(self, filepath: str) -> None:
    """
    Deletes a file from the filesystem.

    Args:
        filepath (str): The complete path of the file to be deleted.

    Returns:
        None

    Raises:
        Exception: If an error occurs while deleting the file, or if the file does not exist.

    Examples:
        >> FileReaderUtil().delete_file("path/to/file.txt")
    """
    try:
        if os.path.exists(filepath):
            os.remove(filepath)
            self.logger.info("Successfully deleted file: %s", filepath)
        error_message = f"The file '{filepath}' does not exist."
        self.logger.error(error_message)
        raise error_message
    except Exception as e:
        error_description = f"An error occurred while deleting the file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

extract_7z_file(zip_file_name, target_dir)

Extracts a 7Z file to the specified target directory.

Parameters:

Name Type Description Default
zip_file_name str

The complete path of the 7Z file to be extracted.

required
target_dir str

The target directory where the files will be extracted.

required

Returns:

Name Type Description
str str

The path of the first extracted file.

Examples:

extracted_file = FileReaderUtil(). extract_7z_file("path/to/file.7z", "path/to/extract")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def extract_7z_file(self, zip_file_name: str, target_dir: str) -> str:
    """
    Extracts a 7Z file to the specified target directory.

    Args:
        zip_file_name (str): The complete path of the 7Z file to be extracted.
        target_dir (str): The target directory where the files will be extracted.

    Returns:
        str: The path of the first extracted file.

    Examples:
        >> extracted_file = FileReaderUtil().
        extract_7z_file("path/to/file.7z", "path/to/extract")
    """
    if platform.system().upper() != "WINDOWS":
        error_message = "7Z extraction is only supported on Windows."
        self.logger.error(error_message)
        raise EnvironmentError(error_message)

    try:
        with py7zr.SevenZipFile(zip_file_name, mode="r") as archive:
            archive.extractall(path=target_dir)
            extracted_file_names = archive.getnames()
            if not extracted_file_names:
                raise ValueError("No files were extracted from the archive.")
            extracted_file_path = os.path.join(target_dir, extracted_file_names[0])
            self.logger.info("Successfully extracted 7Z file: %s to %s",
                             zip_file_name, target_dir)
            return extracted_file_path
    except Exception as e:
        error_description = f"The provided file is not supported: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

read_csv_file(filepath)

Reads a CSV file and returns its content as a list of rows.

Parameters:

Name Type Description Default
filepath str

The complete path of the CSV file to be read.

required

Returns:

Name Type Description
list list

A list of rows from the CSV file.

Examples:

csv_content = FileReaderUtil().read_csv_file("path/to/file.csv")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def read_csv_file(self, filepath: str) -> list:
    """
    Reads a CSV file and returns its content as a list of rows.

    Args:
        filepath (str): The complete path of the CSV file to be read.

    Returns:
        list: A list of rows from the CSV file.

    Examples:
        >> csv_content = FileReaderUtil().read_csv_file("path/to/file.csv")
    """
    try:
        with open(filepath, "r", newline='', encoding='utf-8') as csv_file:
            reader = csv.reader(csv_file)
            content_list = list(reader)
        self.logger.info("Successfully read CSV file: %s", filepath)
        return content_list
    except Exception as e:
        error_description = f"An error occurred while reading the CSV file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

read_excel(filepath, sheet_reference=None)

Reads an Excel file and returns the specified sheet's content.

Parameters:

Name Type Description Default
filepath str

The complete path of the Excel file to be read.

required
sheet_reference str or int

The name or index of the sheet to read. If None, the active sheet will be read.

None

Returns:

Name Type Description
list list

The content of the specified sheet.

Examples:

values = FileReaderUtil().read_excel("path/to/file.xlsx", "Sheet1") values = FileReaderUtil().read_excel("path/to/file.xlsx", 0) # By index

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def read_excel(self, filepath: str, sheet_reference: Union[str, int, None] = None) -> list:
    """
    Reads an Excel file and returns the specified sheet's content.

    Args:
        filepath (str): The complete path of the Excel file to be read.
        sheet_reference (str or int, optional): The name or index of the sheet to read.
            If None, the active sheet will be read.

    Returns:
        list: The content of the specified sheet.

    Examples:
        >> values = FileReaderUtil().read_excel("path/to/file.xlsx", "Sheet1")
        >> values = FileReaderUtil().read_excel("path/to/file.xlsx", 0)  # By index
    """
    warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    try:
        workbook = openpyxl.load_workbook(filepath, data_only=True)
        if isinstance(sheet_reference, str):
            if sheet_reference in workbook.sheetnames:
                sheet = workbook[sheet_reference]
            else:
                self.logger.error("Sheet name %s does not exist.", sheet_reference)
                raise ValueError(f"Sheet name '{sheet_reference}' does not exist.")
        elif isinstance(sheet_reference, int):
            if 0 <= sheet_reference < len(workbook.worksheets):
                sheet = workbook.worksheets[sheet_reference]
            else:
                self.logger.error("Sheet index %s is out of range.", sheet_reference)
                raise ValueError(f"Sheet index '{sheet_reference}' is out of range.")
        else:
            sheet = workbook.active
        content_list = [list(row) for row in sheet.iter_rows(values_only=True)]
        self.logger.info("Successfully read Excel file: %s, sheet: %s", filepath, sheet.title)
        return content_list
    except Exception as e:
        error_description = f"An error occurred while reading the Excel file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

read_pdf_file(filepath)

Reads a PDF file and returns its content as a list of strings.

Parameters:

Name Type Description Default
filepath str

The complete path of the PDF file to be read.

required

Returns:

Name Type Description
list list

A list of strings representing the text content of the PDF.

Examples:

pdf_content = FileReaderUtil().read_pdf_file("path/to/file.pdf")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def read_pdf_file(self, filepath: str) -> list:
    """
    Reads a PDF file and returns its content as a list of strings.

    Args:
        filepath (str): The complete path of the PDF file to be read.

    Returns:
        list: A list of strings representing the text content of the PDF.

    Examples:
        >> pdf_content = FileReaderUtil().read_pdf_file("path/to/file.pdf")
    """
    if not filepath.endswith('.pdf'):
        self.logger.error("The provided file is not a PDF.")
        raise ValueError("The provided file is not a PDF.")
    content_list = []
    try:
        with fitz.open(filepath) as doc:
            for page in doc:
                text = page.get_text("text")
                paragraph = text.replace("\n", " ").replace("\r", "")
                content_list.append(paragraph)
        self.logger.info("Successfully read PDF file: %s", filepath)
        return content_list

    except Exception as e:
        error_description = f"An error occurred while reading the PDF file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

read_txt_file(filepath)

Reads the contents of a text file.

Parameters:

Name Type Description Default
filepath str

The complete path of the text file to be read.

required

Returns:

Name Type Description
str str

The contents of the text file.

Raises:

Type Description
Exception

If an error occurs while reading the text file.

Examples:

content = FileReaderUtil().read_txt_file("path/to/file.txt")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
def read_txt_file(self, filepath: str) -> str:
    """
    Reads the contents of a text file.

    Args:
        filepath (str): The complete path of the text file to be read.

    Returns:
        str: The contents of the text file.

    Raises:
        Exception: If an error occurs while reading the text file.

    Examples:
        >> content = FileReaderUtil().read_txt_file("path/to/file.txt")
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as txt_file:
            content = txt_file.read()
        self.logger.info("Successfully read text file: %s", filepath)
        return content
    except Exception as e:
        error_description = f"An error occurred while reading the text file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

read_word_file(filepath)

Reads a Word file and returns its content as a list of strings.

Parameters:

Name Type Description Default
filepath str

The complete path of the Word file to be read.

required

Returns:

Name Type Description
list list

A list of strings representing the text content of the Word file.

Examples:

word_content = FileReaderUtil().read_word_file("path/to/file.docx")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def read_word_file(self, filepath: str) -> list:
    """
    Reads a Word file and returns its content as a list of strings.

    Args:
        filepath (str): The complete path of the Word file to be read.

    Returns:
        list: A list of strings representing the text content of the Word file.

    Examples:
        >> word_content = FileReaderUtil().read_word_file("path/to/file.docx")
    """
    try:
        with zipfile.ZipFile(filepath) as document:
            xml_content = document.read("word/document.xml")
        string_content = xml_content.decode("utf-8", errors="ignore")
        soup = BeautifulSoup(string_content, features="lxml")
        text_elements = soup.find_all("w:t")
        content_list = [text.text for text in text_elements]
        self.logger.info("Successfully read Word file: %s", filepath)
        return content_list
    except Exception as e:
        error_description = f"An error occurred while reading the Word file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

unzip_zip_file(source_path, destination_path)

Unzips a ZIP file to the specified destination path.

Parameters:

Name Type Description Default
source_path str

The complete path of the ZIP file to be unzipped.

required
destination_path str

The destination path where the files will be extracted.

required

Returns:

Type Description
None

None

Examples:

FileReaderUtil().unzip_zip_file("path/to/file.zip", "path/to/extract")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def unzip_zip_file(self, source_path: str, destination_path: str) -> None:
    """
    Unzips a ZIP file to the specified destination path.

    Args:
        source_path (str): The complete path of the ZIP file to be unzipped.
        destination_path (str): The destination path where the files will be extracted.

    Returns:
        None

    Examples:
        >> FileReaderUtil().unzip_zip_file("path/to/file.zip", "path/to/extract")
    """
    try:
        if not os.path.exists(destination_path):
            os.makedirs(destination_path)
        with zipfile.ZipFile(source_path, 'r') as zip_file:
            zip_file.extractall(destination_path)
        self.logger.info("Successfully unzipped ZIP file: %s to %s",
                         source_path, destination_path)
    except Exception as e:
        error_description = f"The provided file is not supported: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

write_csv_file(filepath, data)

    Writes data to a CSV file.

    Args:
        filepath (str): The complete path of the CSV file to be written.
        data (list of list): The data to write to the CSV file, where each inner list
        represents a row.

    Returns:
        None

    Raises:
        Exception: If an error occurs while writing to the CSV file.

r4[p' Examples: >> data = [['Name', 'Age'], ['John', 30], ['Jane', 25]] >> FileReaderUtil().write_csv_file("path/to/file.csv", data)

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
    def write_csv_file(self, filepath: str, data: list[list]) -> None:
        """
        Writes data to a CSV file.

        Args:
            filepath (str): The complete path of the CSV file to be written.
            data (list of list): The data to write to the CSV file, where each inner list
            represents a row.

        Returns:
            None

        Raises:
            Exception: If an error occurs while writing to the CSV file.

r4[p'        Examples:
            >> data = [['Name', 'Age'], ['John', 30], ['Jane', 25]]
            >> FileReaderUtil().write_csv_file("path/to/file.csv", data)
        """
        try:
            with open(filepath, 'w', newline='', encoding='utf-8') as csv_file:
                writer = csv.writer(csv_file)
                writer.writerows(data)
            self.logger.info("Successfully wrote to CSV file: %s", filepath)
        except Exception as e:
            error_description = f"An error occurred while writing to the CSV file: {str(e)}"
            self.__exceptions_generic.raise_generic_exception(
                message=error_description,
                insert_report=True,
                trim_log=True,
                log_local=True,
                fail_test=False,
            )
            raise e

write_excel_file(filepath, data)

Writes data to an Excel file.

Parameters:

Name Type Description Default
filepath str

The complete path of the Excel file to be written.

required
data list of dict

The data to write to the Excel file.

required

Returns:

Type Description
None

None

Raises:

Type Description
Exception

If an error occurs while writing to the Excel file.

Examples:

data = [{'Name': 'John', 'Age': 30}, {'Name': 'Jane', 'Age': 25}] FileReaderUtil().write_excel_file("path/to/file.xlsx", data)

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def write_excel_file(self, filepath: str, data: list[dict]) -> None:
    """
    Writes data to an Excel file.

    Args:
        filepath (str): The complete path of the Excel file to be written.
        data (list of dict): The data to write to the Excel file.

    Returns:
        None

    Raises:
        Exception: If an error occurs while writing to the Excel file.

    Examples:
        >> data = [{'Name': 'John', 'Age': 30}, {'Name': 'Jane', 'Age': 25}]
        >> FileReaderUtil().write_excel_file("path/to/file.xlsx", data)
    """
    try:
        df = pd.DataFrame(data)
        df.to_excel(filepath, index=False)
        self.logger.info("Successfully wrote to Excel file: %s", filepath)
    except Exception as e:
        error_description = f"An error occurred while writing to the Excel file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

write_txt_file(filepath, content)

Writes content to a text file.

Parameters:

Name Type Description Default
filepath str

The complete path of the text file to be written.

required
content str

The content to write to the text file.

required

Returns:

Type Description
None

None

Raises:

Type Description
Exception

If an error occurs while writing to the text file.

Examples:

FileReaderUtil().write_txt_file("path/to/file.txt", "Hello, World!")

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def write_txt_file(self, filepath: str, content: str) -> None:
    """
    Writes content to a text file.

    Args:
        filepath (str): The complete path of the text file to be written.
        content (str): The content to write to the text file.

    Returns:
        None

    Raises:
        Exception: If an error occurs while writing to the text file.

    Examples:
        >> FileReaderUtil().write_txt_file("path/to/file.txt", "Hello, World!")
    """
    try:
        with open(filepath, 'w', encoding='utf-8') as txt_file:
            txt_file.write(content)
        self.logger.info("Successfully wrote to text file: %s", filepath)
    except Exception as e:
        error_description = f"An error occurred while writing to the text file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e

write_word_file(filepath, content)

Writes content to a Word file.

Parameters:

Name Type Description Default
filepath str

The complete path of the Word file to be written.

required
content list of str

The content to write to the Word file, where each

required

Returns:

Type Description
None

None

Raises:

Type Description
Exception

If an error occurs while writing to the Word file.

Examples:

content = ["First paragraph.", "Second paragraph."] FileReaderUtil().write_word_file("path/to/file.docx", content)

Source code in libs\cafex_core\src\cafex_core\utils\file_reader_utils.py
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
def write_word_file(self, filepath: str, content: list[str]) -> None:
    """
    Writes content to a Word file.

    Args:
        filepath (str): The complete path of the Word file to be written.
        content (list of str): The content to write to the Word file, where each
        string is a paragraph.

    Returns:
        None

    Raises:
        Exception: If an error occurs while writing to the Word file.

    Examples:
        >> content = ["First paragraph.", "Second paragraph."]
        >> FileReaderUtil().write_word_file("path/to/file.docx", content)
    """
    try:
        doc = Document()
        for paragraph in content:
            doc.add_paragraph(paragraph)
        doc.save(filepath)
        self.logger.info("Successfully wrote to Word file: %s", filepath)
    except Exception as e:
        error_description = f"An error occurred while writing to the Word file: {str(e)}"
        self.__exceptions_generic.raise_generic_exception(
            message=error_description,
            insert_report=True,
            trim_log=True,
            log_local=True,
            fail_test=False,
        )
        raise e