Source code for mastml.utils

"""
The utils module contains a collection of miscellaneous methods and error handling used throughout MAST-ML
"""
import numpy as np
import sys
import logging
import textwrap
import time
import os
import random
from os.path import join
from collections import defaultdict
from math import log, floor, ceil

[docs]class BetweenFilter(object): """ Class to aid in handling logger display levels Args: min_level: (int), minimum verbosity level max_level: (int), maximum verbosity level Methods: filter: Method to return logging level of logging.logRecord object Args: logRecord: (python logging.logRecord object) Returns: (int) logging level of logging.logRecord object, which is between the min and max provided levels """ def __init__(self, min_level, max_level): self.min_level = min_level self.max_level = max_level
[docs] def filter(self, logRecord): return self.min_level <= logRecord.levelno <= self.max_level
[docs]def activate_logging(savepath, paths, logger_name='mastml', to_screen=True, to_file=True, verbosity = 0): """ Method to create MAST-ML logger file Args: savepath: (str), string specifying the save path paths: (list), list containing strings of path locations for config file, data file, and results folder logger_name: (str), name of logger file to_screen: (bool), whether or not to write the log contents to the screen during a run to_file: (bool), whether or not to write the log contents to a file in the savepath verbosity: (int), controls the amount of output produced in the logger. Accepted values: 0 shows everything -1 hides debug -2 hides info (so no stdout except print) -3 hides warning -4 hides error -5 hides all output Returns: None """ #formatter = logging.Formatter("%(filename)s : %(funcName)s %(message)s") time_formatter = logging.Formatter("[%(levelname)s] %(asctime)s : %(message)s") level_formatter = logging.Formatter("[%(levelname)s] %(message)s") rootLogger = logging.getLogger(logger_name) rootLogger.setLevel(logging.DEBUG) verbosalize_logger(rootLogger, verbosity) if to_file: # send everything to log.log log_hdlr = logging.StreamHandler(open(join(savepath, 'log.log'), 'a')) log_hdlr.setLevel(logging.DEBUG) log_hdlr.setFormatter(time_formatter) rootLogger.addHandler(log_hdlr) # send WARNING and above to errors.log errors_hdlr = logging.StreamHandler(open(join(savepath, 'errors.log'), 'a')) errors_hdlr.setLevel(logging.WARNING) errors_hdlr.setFormatter(time_formatter) rootLogger.addHandler(errors_hdlr) if to_screen: # send INFO and DEBUG (if not suprressed) to stdout lower_level = logging.INFO if verbosity >= 0 else logging.DEBUG if verbosity >= 0: lower_level = logging.DEBUG # DEBUG and INFO elif verbosity == -1: lower_level = logging.INFO # only INFO else: lower_level = logging.WARNING # effectively disables stdout stdout_hdlr = logging.StreamHandler(sys.stdout) stdout_hdlr.setLevel(lower_level) stdout_hdlr.addFilter(BetweenFilter(lower_level, logging.INFO)) stdout_hdlr.setFormatter(level_formatter) rootLogger.addHandler(stdout_hdlr) # send WARNING and above to stderr, # verbosity of -3 sets WARNING, -4 sets ERROR, and -5 sets CRITICAL lower_level = max(-10*verbosity + 10, logging.WARNING) stderr_hdlr = logging.StreamHandler(sys.stderr) stderr_hdlr.setLevel(lower_level) stderr_hdlr.setFormatter(level_formatter) rootLogger.addHandler(stderr_hdlr) log_header(paths, rootLogger) # only shows up in files
[docs]def log_header(paths, log): """ Method to create header for MAST-ML logger Args: paths: (list), list containing strings of path locations for config file, data file, and results folder log: (logging object), a python log Returns: None """ logo = textwrap.dedent(f"""\ __ ___ __________ __ _____ / |/ /__ _/ __/_ __/___/ |/ / / / /|_/ / _ `/\ \ / / /___/ /|_/ / /__ /_/ /_/\_,_/___/ /_/ /_/ /_/____/ """) date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) header = (f"\n\n{logo}\n\nMAST-ML run on {date_time} using \n" f"conf file: {os.path.basename(paths[0])}\n" f"csv file: {os.path.basename(paths[1])}\n" f"saving to: {os.path.basename(paths[2])}\n\n") # only shows on stdout and log.log log.info(header)
## Custom errors for mastml:
[docs]class MastError(Exception): """ Base class for MAST-ML specific errors that should be shown to the user """ pass
[docs]class ConfError(MastError): """ Class representing error in input configuration file """ pass
[docs]class InvalidModel(MastError): """ Class representing error when model does not exist """ pass
[docs]class MissingColumnError(MastError): """ Class representing error raised when your csv doesn't have the specified column """ pass
[docs]class InvalidConfParameters(MastError): """ Class representing error raised when you have invalid input configuration file parameters """ pass
[docs]class InvalidConfSubSection(MastError): """ Class representing error raised when an invalid subsection name is present in the input configuration file """ pass
[docs]class InvalidConfSection(MastError): """ Class representing error raised when an invalid section name is present in the input configuration file """ pass
[docs]class FiletypeError(MastError): """ Class representing error raised when an improper file extension is used """ pass
[docs]class FileNotFoundError(MastError): # sorry for re-using builtin name """ Class representing error raised when a needed file cannot be found """ pass
[docs]class InvalidValue(MastError): """ Class representing error raised when an invalid value has been used """ pass
## Math utilities to aid plot_helper to make ranges #TODO: can likely remove from here as in plot_helper now
[docs]def nice_range(lower, upper): """ Method to create a range of values, including the specified start and end points, with nicely spaced intervals Args: lower: (float or int), lower bound of range to create upper: (float or int), upper bound of range to create Returns: (list), list of numerical values in established range """ flipped = 1 # set to -1 for inverted # Case for validation where nan is passed in if np.isnan(lower): lower = 0 if np.isnan(upper): upper = 0.1 if upper < lower: upper, lower = lower, upper flipped = -1 return [_int_if_int(x) for x in _nice_range_helper(lower, upper)][::flipped]
def _nice_range_helper(lower, upper): """ Method to help make a better range of axis ticks Args: lower: (float), lower value of axis ticks upper: (float), upper value of axis ticks Returns: upper: (float), modified upper tick value fixed based on set of axis ticks """ steps = 8 diff = abs(lower - upper) # special case where lower and upper are the same if diff == 0: return [lower,] # the exact step needed step = diff / steps # a rough estimate of best step step = _nearest_pow_ten(step) # whole decimal increments # tune in one the best step size factors = [0.1, 0.2, 0.5, 1, 2, 5, 10] # use this to minimize how far we are from ideal step size def best_one(steps_factor): steps_count, factor = steps_factor return abs(steps_count - steps) n_steps, best_factor = min([(diff / (step * f), f) for f in factors], key = best_one) #print('should see n steps', ceil(n_steps + 2)) # multiply in the optimal factor for getting as close to ten steps as we can step = step * best_factor # make the bounds look nice lower = _three_sigfigs(lower) upper = _three_sigfigs(upper) start = _round_up(lower, step) # prepare for iteration x = start # pointless init i = 0 # itereate until we reach upper while x < upper - step: x = start + i * step yield _three_sigfigs(x) # using sigfigs because of floating point error i += 1 # finish off with ending bound yield upper def _three_sigfigs(x): """ Method invoking special case of _n_sigfigs to return 3 sig figs Args: x: (float), an axis tick number Returns: (float), number of sig figs (always 3) """ return _n_sigfigs(x, 3) def _n_sigfigs(x, n): """ Method to return number of sig figs to use for axis ticks Args: x: (float), an axis tick number Returns: (float), number of sig figs """ sign = 1 if x == 0: return 0 if x < 0: # case for negatives x = -x sign = -1 if x < 1: base = n - round(log(x, 10)) else: base = (n-1) - round(log(x, 10)) return sign * round(x, base) def _nearest_pow_ten(x): """ Method to return the nearest power of ten for an axis tick value Args: x: (float), an axis tick number Returns: (float), nearest power of ten of x """ sign = 1 if x == 0: return 0 if x < 0: # case for negatives x = -x sign = -1 return sign*10**ceil(log(x, 10)) def _int_if_int(x): """ Method to return integer mapped value of x Args: x: (float or int), a number Returns: x: (float), value of x mapped as integer """ if int(x) == x: return int(x) return x def _round_up(x, inc): """ Method to round up the value of x Args: x: (float or int), a number inc: (float), an increment for axis ticks Returns: (float), value of x rounded up """ sign = 1 if x < 0: # case for negative x = -x sign = -1 return sign * inc * ceil(x / inc)
[docs]def verbosalize_logger(log, verbosity): if verbosity <= 0: return if verbosity >= 8: while True: log.critical('MASTML'*random.randint(3,2**(verbosity-4)))
#old_log = log._log #def new_log(level, msg, *args, **kwargs): # old_log(level, [None, None, to_upper, to_full_width, to_leet, deep_fry, deep_fry_2, emojify][verbosity](msg), *args, **kwargs) #log._log = new_log ## Joke functions: """ ### String formatting funcs for inserting into log._log when in verbose mode def to_upper(message): return str(message).upper() def to_full_width(message): message = str(message) ret = [] return ''.join(chr(ord(c)+0xFEE0) if c.isalnum() else c for c in message) def to_leet(message): conv = {'a': '4', 'b': '8', 'e': '3', 'l': '1', 'o': '0', 's': '5', 't': '7'} message = ''.join(conv[c] if c in conv else c for c in str(message)) return message.upper() def deep_fry_helper(s): for c in s: if random.random() < 0.2: yield chr(random.randint(1, 10000)) elif random.random() < 0.5: yield c.upper() else: yield c def deep_fry(message): return ''.join(deep_fry_helper(str(message))) def deep_fry_2_helper(s): for c in s: if random.random() < 0.01: x = chr(random.randint(1, 10000)) yield ''.join(x for _ in range(random.randint(1,5))) elif random.random() < 0.1: yield ''.join(c for _ in range(random.randint(1,9))) elif random.random() < 0.01: yield chr(ord(c)*10) elif random.random() < 0.2: yield c.upper() else: yield c def deep_fry_2(message): return ''.join(deep_fry_2_helper(str(message))) def emojify(message): message = str(message) words = {'score':0x1f600, 'splits': chr(0x21A9)+chr(0x21AA), 'split': chr(0x21A9)+chr(0x21AA), 'score':0x2728, 'number':0x0023, 'train':0x1f682, 'test':0x1f4dd, 'models':0x1F483, 'model':0x1F483, 'plot':0x1f4ca, 'image':0x1f5bc, 'file': 0x1f5c4, 'files':0x1f5c3, ' to':0x27a1, '1':0x261d, '2':0x270c} for word in words: if type(words[word]) is int: words[word] = chr(words[word]) words[word] += ' ' for word, emoji in words.items(): message = message.replace(' '+word+' ', ' '+emoji+' ') for word, emoji in words.items(): message = message.replace(word, emoji) return message.upper() def verbosalize_logger(log, verbosity): if verbosity <= 0: return if verbosity >= 8: while True: log.critical('MSATML'*random.randint(3,2**(verbosity-4))) old_log = log._log def new_log(level, msg, *args, **kwargs): old_log(level, [None, None, to_upper, to_full_width, to_leet, deep_fry, deep_fry_2, emojify][verbosity](msg), *args, **kwargs) log._log = new_log """