Add Validator, fix default handling of column conversion, statistics stores charset for comparison
This commit is contained in:
parent
0194711016
commit
ab30cb57ee
10
README.md
10
README.md
@ -15,8 +15,14 @@ If an error occurs during the conversion of a table or column, an output with th
|
||||
## Usage
|
||||
|
||||
```
|
||||
python convert.py [-h] [-v] [-s] -H HOST -P PORT -u USER -p PASSWORD -d DATABASE
|
||||
python convert.py [-h] [-v] [-s | -V] -H HOST -P PORT -u USER -p PASSWORD -d DATABASE
|
||||
```
|
||||
|
||||
Options:
|
||||
- `-h/--help`
|
||||
- `-s/--statistics`
|
||||
- `-V/--validate`
|
||||
|
||||
Required arguments:
|
||||
- `-H/--host HOST`
|
||||
- `-P/--port PORT`
|
||||
@ -25,6 +31,4 @@ Required arguments:
|
||||
- `-d/--database DATABASE`
|
||||
|
||||
Optional arguments:
|
||||
- `-h/--help`
|
||||
- `-v/--verbose`
|
||||
- `-s/--statistics`
|
29
convert.py
29
convert.py
@ -3,8 +3,10 @@
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
from json import dumps
|
||||
from convert.validation import Validation
|
||||
from convert.statistics import Statistics
|
||||
from convert.utf8mb4converter import UTF8MB4Converter
|
||||
from convert.utf8mb4converter import UTF8MB4Converter, DEFAULT_CHARSET
|
||||
|
||||
def main (
|
||||
args: argparse.Namespace
|
||||
@ -14,11 +16,11 @@ def main (
|
||||
or converts the database itself, all tables and all text fields to utf8mb4 if they don't already
|
||||
have this character set.
|
||||
|
||||
Params:
|
||||
Parameters:
|
||||
- args (argparse.Namespace)
|
||||
- Contains arguments passed to the program
|
||||
"""
|
||||
|
||||
logger: logging.Logger = logging.getLogger("Main")
|
||||
db: UTF8MB4Converter = UTF8MB4Converter (
|
||||
user = args.user,
|
||||
password = args.password,
|
||||
@ -28,12 +30,16 @@ def main (
|
||||
)
|
||||
|
||||
if args.statistics:
|
||||
stats = Statistics(db)
|
||||
logging.getLogger("Main").info(f"Database statistics:\n{stats}")
|
||||
stats: Statistics = Statistics(db)
|
||||
logger.info(f"Database statistics:\n{stats}")
|
||||
|
||||
elif args.validate:
|
||||
validator = Validation(db)
|
||||
validation: dict = validator.convert_validate()
|
||||
logger.info(f"Database conversion validation:\n{dumps(validation, indent=4)}")
|
||||
|
||||
else:
|
||||
db.convert_charset_db()
|
||||
db.convert_charset_all_columns_all_tables()
|
||||
db.convert_charset_all_tables()
|
||||
db.convert_charset_all()
|
||||
|
||||
def parse_args (
|
||||
) -> argparse.Namespace:
|
||||
@ -41,15 +47,18 @@ def parse_args (
|
||||
Parses the arguments passed to the program.
|
||||
|
||||
Returns:
|
||||
- An argparse namespace containing the parsed arguments
|
||||
- An argparse namespace containing the parsed arguments
|
||||
"""
|
||||
|
||||
argparser: argparse.ArgumentParser = argparse.ArgumentParser()
|
||||
args_opt: argparse._ArgumentGroup = argparser.add_argument_group("Optional Arguments")
|
||||
args_req: argparse._ArgumentGroup = argparser.add_argument_group("Required Arguments")
|
||||
args_exc: argparse._MutuallyExclusiveGroup = argparser.add_mutually_exclusive_group()
|
||||
|
||||
args_opt.add_argument("-v", "--verbose", action="store_true")
|
||||
args_opt.add_argument("-s", "--statistics", action="store_true")
|
||||
|
||||
args_exc.add_argument("-s", "--statistics", action="store_true")
|
||||
args_exc.add_argument("-V", "--validate", action="store_true")
|
||||
|
||||
args_req.add_argument("-H", "--host", required=True)
|
||||
args_req.add_argument("-P", "--port", required=True, type=int)
|
||||
|
@ -14,22 +14,29 @@ class Statistics:
|
||||
- The converter object storing the database information and connection
|
||||
- data (dict)
|
||||
- A dictionary holding the generated data: Number of tables & columns and character set overview
|
||||
- charset (str):
|
||||
- A string storing the target charset
|
||||
"""
|
||||
|
||||
def __init__ (
|
||||
self,
|
||||
dbcon: UTF8MB4Converter
|
||||
dbcon: UTF8MB4Converter,
|
||||
charset: str = DEFAULT_CHARSET
|
||||
) -> None:
|
||||
"""
|
||||
Constructor of Statistics object. Generates statistics at creation.
|
||||
|
||||
Parameters:
|
||||
- dbcon (UTF8MB4Converter)
|
||||
- The converter object storing the database information and connection
|
||||
- The converter object storing the database information and connection
|
||||
- charset (str):
|
||||
- the target charset for comparison
|
||||
- default: DEFAULT_CHARSET from class UTF8MB4Converter
|
||||
"""
|
||||
|
||||
self.dbcon = dbcon
|
||||
self.data: dict = None
|
||||
self.charset = charset
|
||||
self.update_stats()
|
||||
|
||||
def __str__ (
|
||||
@ -101,12 +108,12 @@ class Statistics:
|
||||
},
|
||||
"converted": {
|
||||
"tables": {
|
||||
"converted": charset_tab[DEFAULT_CHARSET],
|
||||
"missing": count_tab - charset_tab[DEFAULT_CHARSET]
|
||||
"converted": charset_tab[self.charset],
|
||||
"missing": count_tab - charset_tab[self.charset]
|
||||
},
|
||||
"columns": {
|
||||
"converted": charset_col[DEFAULT_CHARSET],
|
||||
"missing": count_col - charset_col[DEFAULT_CHARSET] - charset_col[None]
|
||||
"converted": charset_col[self.charset],
|
||||
"missing": count_col - charset_col[self.charset] - charset_col[None]
|
||||
}
|
||||
}
|
||||
}
|
@ -291,12 +291,9 @@ class UTF8MB4Converter:
|
||||
self.logger.debug(f"Column {col}(@{table}) already has character set {charset}")
|
||||
return
|
||||
|
||||
if column['nullable'] == "YES":
|
||||
constraint = "NULL"
|
||||
else:
|
||||
constraint = "NOT NULL"
|
||||
if column['dvalue'] is not None:
|
||||
constraint += f" DEFAULT {column['dvalue']}"
|
||||
constraint = "NULL" if column["nullable"] == "YES" else "NOT NULL"
|
||||
if column['dvalue'] is not None:
|
||||
constraint += f" DEFAULT {column['dvalue']}"
|
||||
|
||||
query = " ".join((
|
||||
f"ALTER TABLE {table} CHANGE {col} {col}",
|
||||
@ -355,4 +352,25 @@ class UTF8MB4Converter:
|
||||
|
||||
tables = self.get_tables()
|
||||
for table in tables:
|
||||
self.convert_charset_all_columns_single_table(table, charset, collation)
|
||||
self.convert_charset_all_columns_single_table(table, charset, collation)
|
||||
|
||||
def convert_charset_all (
|
||||
self,
|
||||
charset: str = DEFAULT_CHARSET,
|
||||
collation: str = DEFAULT_COLLATION
|
||||
) -> None:
|
||||
"""
|
||||
Alters the charset and collation of the database, all columns and all tables
|
||||
|
||||
Parameters:
|
||||
- charset (str)
|
||||
- target character set
|
||||
- default value: utf8mb4
|
||||
- collation (str)
|
||||
- target collation
|
||||
- default value: utf8mb4_unicode_520_ci
|
||||
"""
|
||||
|
||||
self.convert_charset_db(charset, collation)
|
||||
self.convert_charset_all_columns_all_tables(charset, collation)
|
||||
self.convert_charset_all_tables(charset, collation)
|
190
convert/validation.py
Normal file
190
convert/validation.py
Normal file
@ -0,0 +1,190 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (c) 2023 Akumatic
|
||||
|
||||
from collections import defaultdict
|
||||
from convert.utf8mb4converter import UTF8MB4Converter
|
||||
|
||||
class MissingStateException(Exception):
|
||||
"""
|
||||
Custom exception indicating a missing state from validation object.
|
||||
"""
|
||||
|
||||
class Validation:
|
||||
"""
|
||||
Class for validating the conversion of the database. The state of the database
|
||||
before and after conversion is queried and compared. Deviations are shown.
|
||||
|
||||
Attributes:
|
||||
- dbcon (UTF8MB4Converter)
|
||||
- The converter object storing the database information and connection
|
||||
- start (defaultdict)
|
||||
- A dictionary holding the state of the database, before conversion.
|
||||
- end (defaultdict)
|
||||
- A dictionary holding the state of the database, after conversion.
|
||||
"""
|
||||
|
||||
def __init__ (
|
||||
self,
|
||||
dbcon: UTF8MB4Converter
|
||||
) -> None:
|
||||
"""
|
||||
Constructor of Validation object. Generates overview of the database at creation.
|
||||
|
||||
Parameters:
|
||||
- dbcon (UTF8MB4Converter)
|
||||
- The converter object storing the database information and connection
|
||||
"""
|
||||
|
||||
self.dbcon: UTF8MB4Converter = dbcon
|
||||
self.start: defaultdict = None
|
||||
self.end: defaultdict = None
|
||||
|
||||
def generate_start_state (
|
||||
self
|
||||
) -> None:
|
||||
"""
|
||||
Fetches current column schema of the database and stores it in the start attribute.
|
||||
"""
|
||||
|
||||
self.start = self._get_state()
|
||||
|
||||
def generate_end_state (
|
||||
self
|
||||
) -> None:
|
||||
"""
|
||||
Fetches current column schema of the database and stores it in the end attribute.
|
||||
"""
|
||||
|
||||
self.end = self._get_state()
|
||||
|
||||
def compare_states (
|
||||
self
|
||||
) -> dict:
|
||||
"""
|
||||
Compares start and end state and stores information about changed schemas.
|
||||
generate_start_state and generate_end_state should be called first.
|
||||
|
||||
Returns:
|
||||
- A dict containing a numeric summary and details about mismatched columns.
|
||||
|
||||
Raises:
|
||||
- MissingStateException
|
||||
- Raised when either start state or end state is not set
|
||||
"""
|
||||
|
||||
if self.start is None:
|
||||
raise MissingStateException("No start state stored. Make sure to call generate_start_state")
|
||||
if self.end is None:
|
||||
raise MissingStateException("No end state stored. Make sure to call generate_end_state")
|
||||
|
||||
summary: dict = {"unaltered": 0, "altered": 0}
|
||||
details: defaultdict = defaultdict(dict)
|
||||
|
||||
for table in self.start.keys():
|
||||
a: dict = self.start[table]
|
||||
b: dict = self.end[table]
|
||||
for column in a.keys():
|
||||
comp: dict = self._get_differences(a[column], b[column])
|
||||
if len(comp) == 0:
|
||||
summary["unaltered"] += 1
|
||||
else:
|
||||
summary["altered"] += 1
|
||||
details[table][column] = comp
|
||||
|
||||
return {"summary": summary, "details": details}
|
||||
|
||||
def convert_validate (
|
||||
self
|
||||
) -> dict:
|
||||
"""
|
||||
Alters the charset and collation of the database, all columns and all tables.
|
||||
Validates that no other field was changed.
|
||||
|
||||
Returns:
|
||||
- A dict containing a numeric summary and details about mismatched columns.
|
||||
"""
|
||||
|
||||
self.generate_start_state()
|
||||
self.dbcon.convert_charset_all()
|
||||
self.generate_end_state()
|
||||
return self.compare_states()
|
||||
|
||||
def _get_differences (
|
||||
self,
|
||||
a: dict,
|
||||
b: dict
|
||||
) -> dict:
|
||||
"""
|
||||
Compares two given column data sets and compares the values for all keys
|
||||
but for the fields changed by character set conversion (CHARACTER_SET_NAME,
|
||||
COLLATION_NAME and CHARACTER_OCTET_LENGTH). Stores before and after value
|
||||
for each column.
|
||||
|
||||
Parameters:
|
||||
- a (dict)
|
||||
- A dictionary containing the information schema of a column.
|
||||
- b (dict)
|
||||
- A dictionary containing the information schema of a column
|
||||
- Used for comparison with dictionary a
|
||||
|
||||
Returns:
|
||||
- A dict with the keys and values of deviations between the two given dicts,
|
||||
ignoring certain values changed by character set conversion.
|
||||
"""
|
||||
|
||||
data = dict()
|
||||
keys = a.keys()
|
||||
for key in keys:
|
||||
if key == "CHARACTER_SET_NAME":
|
||||
continue
|
||||
if key == "COLLATION_NAME":
|
||||
continue
|
||||
if key == "CHARACTER_OCTET_LENGTH":
|
||||
continue
|
||||
if a[key] != b[key]:
|
||||
data[key] = {
|
||||
"Before": a[key],
|
||||
"After": b[key]
|
||||
}
|
||||
return data
|
||||
|
||||
def _get_columns_of_table (
|
||||
self,
|
||||
table: str
|
||||
) -> list:
|
||||
"""
|
||||
Fetches all information about the columns of a given table.
|
||||
|
||||
Parameters:
|
||||
- table (str)
|
||||
- the tabke whose columns are to be retrieved
|
||||
|
||||
Returns:
|
||||
- A list of dicts, containing the full column information.
|
||||
"""
|
||||
|
||||
query = " ".join((
|
||||
"SELECT * FROM information_schema.COLUMNS",
|
||||
f"WHERE table_schema = '{self.dbcon.db}' AND table_name = '{table}'"
|
||||
))
|
||||
self.dbcon.kcursor.execute(query)
|
||||
return self.dbcon.kcursor.fetchall()
|
||||
|
||||
def _get_state (
|
||||
self
|
||||
) -> defaultdict:
|
||||
"""
|
||||
Fetches column schema of the database and stores it for each table and column.
|
||||
|
||||
Returns:
|
||||
- A defaultdict that contains one dictionary for each table.
|
||||
Each table dictionary contains one dict per column, which contains the column schema.
|
||||
"""
|
||||
|
||||
state: defaultdict = defaultdict(dict)
|
||||
tables: list = self.dbcon.get_tables()
|
||||
for table in tables:
|
||||
columns: list = self._get_columns_of_table(table)
|
||||
for column in columns:
|
||||
state[table][column["COLUMN_NAME"]] = column
|
||||
return state
|
Loading…
x
Reference in New Issue
Block a user