From 2673c92b4e6ac530ef2c6eca901bb7611ea7360d Mon Sep 17 00:00:00 2001 From: Akumatic Date: Thu, 4 May 2023 10:41:12 +0200 Subject: [PATCH] Add functionality to generate an overview of the db --- convert.py | 19 ++++--- convert/statistics.py | 112 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 convert/statistics.py diff --git a/convert.py b/convert.py index fec51cd..d687338 100644 --- a/convert.py +++ b/convert.py @@ -3,15 +3,16 @@ import logging import argparse +from convert.statistics import Statistics from convert.utf8mb4converter import UTF8MB4Converter def main ( args: argparse.Namespace ) -> None: """ - Main program sequence. Establishes a connection to the database, converts - the default charset and collation of the database itself, all tables and - all text fields to UTF8MB4 if its not in UTF8MB4 yet. + Main program sequence. Establishes a connection to the database and either creates statistics + or converts the database itself, all tables and all text fields to utf8mb4 if they don't already + have this character set. Params: - args (argparse.Namespace) @@ -25,9 +26,14 @@ def main ( port = args.port, db = args.database ) - db.convert_charset_db() - db.convert_charset_all_tables() - db.convert_charset_all_columns_all_tables() + + if args.statistics: + stats = Statistics(db) + logging.getLogger("Main").info(f"Database statistics:\n{stats}") + else: + db.convert_charset_db() + db.convert_charset_all_tables() + db.convert_charset_all_columns_all_tables() def parse_args ( ) -> argparse.Namespace: @@ -43,6 +49,7 @@ def parse_args ( args_req: argparse._ArgumentGroup = argparser.add_argument_group("Required Arguments") args_opt.add_argument("-v", "--verbose", action="store_true") + args_opt.add_argument("-s", "--statistics", action="store_true") args_req.add_argument("-H", "--host", required=True) args_req.add_argument("-P", "--port", required=True, type=int) diff --git a/convert/statistics.py b/convert/statistics.py new file mode 100644 index 0000000..2db98fc --- /dev/null +++ b/convert/statistics.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2023 Akumatic + +from json import dumps +from collections import defaultdict +from .utf8mb4converter import UTF8MB4Converter, DEFAULT_CHARSET + +class Statistics: + """ + Class for creating statistics that give an overview of the character set status of the database + + Attributes: + - dbcon (UTF8MB4Converter) + - The converter object storing the database information and connection + - data (dict) + - A dictionary holding the generated data: Number of tables & columns and character set overview + """ + + def __init__ ( + self, + dbcon: UTF8MB4Converter + ) -> None: + """ + Constructor of Statistics object. Generates statistics at creation. + + Parameters: + - dbcon (UTF8MB4Converter) + - The converter object storing the database information and connection + """ + + self.dbcon = dbcon + self.data: dict = None + self.update_stats() + + def __str__ ( + self + ) -> str: + """ + String representation method. Prints the data stored if the object is passed to print() or str() + + Returns: + - A string representation of the data stored in the object by calling get_data_formatted_str + """ + + return self.get_data_formatted_str() + + def get_data_formatted_str ( + self, + indent: int = 4 + ) -> str: + """ + Formats the stored data for better readability. + + Parameters: + - indent (int) + - number of spaces for indentation + - default value: 4 + + Returns: + - String representation of the data stored in the object + """ + + return dumps(self.data, indent=indent) + + def update_stats ( + self + ) -> None: + """ + Fetches data from the database and stores it in the object. + """ + + tables = self.dbcon.get_tables() + + # store dict with count of tables and columns + count_tab = len(tables) + count_col = 0 + + # store dict with count of different charsets of tables and columns + charset_tab = defaultdict(int) + charset_col = defaultdict(int) + + # fill prepared dicts with data + for table in tables: + charset = self.dbcon.get_charset_table(table)["charset"] + charset_tab[charset] += 1 + + columns = self.dbcon.get_columns_of_table(table) + count_col += len(columns) + for column in columns: + charset_col[column["charset"]] += 1 + + # store generated data into the object itself + self.data = { + "count": { + "tables": count_tab, + "columns": count_col + }, + "charset": { + "tables": charset_tab, + "columns": charset_col + }, + "converted": { + "tables": { + "converted": charset_tab[DEFAULT_CHARSET], + "missing": count_tab - charset_tab[DEFAULT_CHARSET] + }, + "columns": { + "converted": charset_col[DEFAULT_CHARSET], + "missing": count_col - charset_col[DEFAULT_CHARSET] - charset_col[None] + } + } + } \ No newline at end of file