ExamScan/scan/io.py

# SPDX-License-Identifier: MIT
# Copyright (c) 2019 Akumatic

import cv2, argparse, numpy

##############
# Exceptions #
##############

class UnsupportedFileType(Exception):
    pass

class UnsupportedNumberOfPages(Exception):
    pass

class MissingArg(Exception):
    pass

####################
# Argument Parsing #
####################

def parse_args (
    ) -> argparse.Namespace:
    """ Parser for cli arguments.

    Returns:
        A Namespace containing all parsed data
    """
    # The parser itself
    parser = argparse.ArgumentParser(add_help=False)
    parser.description = "Evaluates single choice sheets"

    # Groups for ordering arguments in help command
    grp_req_excl = parser.add_argument_group("required arguments, mutually exclusive")
    grp_req = parser.add_argument_group("required arguments")
    grp_opt = parser.add_argument_group("optional arguments")

    #########################
    ##### Required Args #####
    #########################

    # Input path - either an url or a path to a local file
    io_grp = grp_req_excl.add_mutually_exclusive_group(required=True)
    io_grp.add_argument("-u", "--url", dest="url",
        help="URL to the image or pdf to be evaluated.")
    io_grp.add_argument("-f", "--file", dest="file",
        help="path to the image or pdf to be evaluated.")

    # required arg for number of answers each question
    grp_req.add_argument("-n", "--num", dest="num", required=True,
        type=_arg_int_pos, help="number of answers per question")

    #########################
    ##### Optional Args #####
    #########################

    # help message. Added manually so it is shown under optional
    grp_opt.add_argument("-h", "--help", action="help", help="show this help message and exit")

    # path to store the result picture to
    grp_opt.add_argument("-i", "--iout", dest="iout",
        help="path for the output picture to be stored.")

    # path to store the result list to
    grp_opt.add_argument("-d", "--dout", dest="dout",
        help="path for the output data to be stored.")

    # path to compare results generated by the program with data stored in a file
    grp_opt.add_argument("-c", "--compare", dest="comp",
        help="compares the calculated result to a given result")

    # plotting all steps
    grp_opt.add_argument("-p", "--plot", dest="plot", action="store_true",
        help="plots every single step")

    return parser.parse_args()

def _arg_int_pos (
        value: str
    ) -> int:
    """ Trying to parse the input argument into a positive value.

    Args:
        value (str):
            The value to be checked

    Returns:
        The checked and casted value

    Raises:
        ArgumentTypeError:
            Raises an ArgumentTypeError if
            - value is not a number
            - value is not positive
    """
    try:
        int_value = int(value)
    except ValueError:
        raise argparse.ArgumentTypeError(f"{value} is not a number.")

    if int_value <= 0:
        raise argparse.ArgumentTypeError(f"{value} is not a positive integer.")
    return int_value

#################
# File Handling #
#################

def read_image (
        path: str = None,
        url: str = None
    ) -> numpy.ndarray:
    """ Opens the image if file extension is supported.

    Supported file extensions are .jpg, .jpeg, .png and .pdf

    Args:
        path (String): Path to local file
        url (String): URL to file

    Returns:
        A ndarray containing the image data
    """
    # Neither URL nor path provied
    if url is None and path is None:
        raise MissingArg

    # Get data and mime type
    if url:
        import requests
        try:
            response = requests.get(url, stream=True).raw
            ext = response.headers["Content-Type"].split("; ")[0]
        except requests.exceptions.ConnectionError as e:
            raise e

    else: # path
        import magic
        try:
            with open(path, "rb") as f:
                file = f.read()
            ext = magic.from_buffer(file, mime=True).split("; ")[0]
        except FileNotFoundError as e:
            raise e

    # If file is image or pdf, parse to cv2
    if ext in ("image/png", "image/jpeg"):
        if url:
            data = numpy.asarray(bytearray(response.read()))
        else:
            data = numpy.asarray(bytearray(file))

        return cv2.imdecode(data, cv2.IMREAD_COLOR)

    elif ext in ("application/pdf"):
        import pdf2image
        if url:
            images = pdf2image.convert_from_bytes(response.read())
        else:
            images = pdf2image.convert_from_bytes(file)

        # only pdf with one page are supported
        if len(images) != 1:
            raise UnsupportedNumberOfPages

        data = numpy.asarray(images[0])
        return cv2.cvtColor(data, cv2.COLOR_RGB2BGR)

    else:
        raise UnsupportedFileType(ext)

def write_image (
        image: numpy.ndarray,
        path: str,
    ):
    """ Stores an image to the given path.
        If the directory does not exist, it tries to create it.

    Args:
        image (ndarray):
            the image to be stored
        path (String): Path for file to be stored
    """
    import os
    path_abs = os.path.abspath(path)
    path_dir = os.path.dirname(path_abs)

    if not os.path.exists(path_dir):
        os.makedirs(path_dir)

    cv2.imwrite(path_abs, image)

def load_results (
        path: str
    ):
    import json
    with open(path, "r") as f:
        return json.loads(f.read())

def store_results (
        data: list,
        path: str
    ):
    import json
    with open(path, "w+") as f:
        f.write(json.dumps(data))

############
# Plotting #
############

def plot (
        orig: numpy.ndarray,
        blur: numpy.ndarray,
        thres: numpy.ndarray,
        edges: numpy.ndarray,
        boxes: numpy.ndarray,
        center_boxes: numpy.ndarray,
        checked: numpy.ndarray,
        end: numpy.ndarray,
    ):
    """ Plots up to 8 given data.
        All Subplot consists of the following:

        - plt.subplot => subplot identifier
        - plt.xticks, plt.yticks => hide axis labels
        - plt.title => Title to be shown
        - plt.imshow => shows given image in the subplot
            - since opencv image is in BGR, it needs to convert to RGB first
    """
    import matplotlib.pyplot as plt
    from imutils import opencv2matplotlib
    idx = 240

    # Subplot y = 1, x = 1
    plt.subplot(idx + 1)
    plt.xticks([]), plt.yticks([])
    plt.title("Original")
    plt.imshow(opencv2matplotlib(orig))

    # Subplot y = 1, x = 2
    plt.subplot(idx + 2)
    plt.xticks([]), plt.yticks([])
    plt.title("Blur")
    plt.imshow(opencv2matplotlib(blur))

    # Subplot y = 1, x = 3
    plt.subplot(idx + 3)
    plt.xticks([]), plt.yticks([])
    plt.title("Threshold")
    plt.imshow(opencv2matplotlib(thres))

    plt.subplot(idx + 4)
    plt.xticks([]), plt.yticks([])
    plt.title("Canny Edge")
    plt.imshow(opencv2matplotlib(edges))

    plt.subplot(idx + 5)
    plt.xticks([]), plt.yticks([])
    plt.title("Boxes")
    plt.imshow(opencv2matplotlib(boxes))

    plt.subplot(idx + 6)
    plt.xticks([]), plt.yticks([])
    plt.title("Center of Boxes")
    plt.imshow(opencv2matplotlib(center_boxes))


    plt.subplot(idx + 7)
    plt.xticks([]), plt.yticks([])
    plt.title("Areas to be checked")
    plt.imshow(opencv2matplotlib(checked))

    plt.subplot(idx + 8)
    plt.xticks([]), plt.yticks([])
    plt.title("Found corrected and checked boxes")
    plt.imshow(opencv2matplotlib(end))

    plt.show()