db-UTF8-MB3-to-MB4/convert/statistics.py

119 lines
3.5 KiB
Python

# SPDX-License-Identifier: MIT
# Copyright (c) 2023 Akumatic
from json import dumps
from collections import defaultdict
from .utf8mb4converter import UTF8MB4Converter, DEFAULT_CHARSET
class Statistics:
"""
Class for creating statistics that give an overview of the character set status of the database
Attributes:
- dbcon (UTF8MB4Converter)
- The converter object storing the database information and connection
- data (dict)
- A dictionary holding the generated data: Number of tables & columns and character set overview
- charset (str):
- A string storing the target charset
"""
def __init__ (
self,
dbcon: UTF8MB4Converter,
charset: str = DEFAULT_CHARSET
) -> None:
"""
Constructor of Statistics object. Generates statistics at creation.
Parameters:
- dbcon (UTF8MB4Converter)
- The converter object storing the database information and connection
- charset (str):
- the target charset for comparison
- default: DEFAULT_CHARSET from class UTF8MB4Converter
"""
self.dbcon = dbcon
self.data: dict = None
self.charset = charset
self.update_stats()
def __str__ (
self
) -> str:
"""
String representation method. Prints the data stored if the object is passed to print() or str()
Returns:
- A string representation of the data stored in the object by calling get_data_formatted_str
"""
return self.get_data_formatted_str()
def get_data_formatted_str (
self,
indent: int = 4
) -> str:
"""
Formats the stored data for better readability.
Parameters:
- indent (int)
- number of spaces for indentation
- default value: 4
Returns:
- String representation of the data stored in the object
"""
return dumps(self.data, indent=indent)
def update_stats (
self
) -> None:
"""
Fetches data from the database and stores it in the object.
"""
tables = self.dbcon.get_tables()
# store dict with count of tables and columns
count_tab = len(tables)
count_col = 0
# store dict with count of different charsets of tables and columns
charset_tab = defaultdict(int)
charset_col = defaultdict(int)
# fill prepared dicts with data
for table in tables:
charset = self.dbcon.get_charset_table(table)["charset"]
charset_tab[charset] += 1
columns = self.dbcon.get_columns_of_table(table)
count_col += len(columns)
for column in columns:
charset_col[column["charset"]] += 1
# store generated data into the object itself
self.data = {
"count": {
"tables": count_tab,
"columns": count_col
},
"charset": {
"tables": charset_tab,
"columns": charset_col
},
"converted": {
"tables": {
"converted": charset_tab[self.charset],
"missing": count_tab - charset_tab[self.charset]
},
"columns": {
"converted": charset_col[self.charset],
"missing": count_col - charset_col[self.charset] - charset_col[None]
}
}
}