Pyspark:防止在使用pysp时删除日志中的终端ansiescape字符

2024-03-28 09:27:54 发布

您现在位置:Python中文网/ 问答频道 /正文

我已经为一个记录器建立了一个定制的格式化程序,我正在使用pyspark,但是看起来我的所有颜色在命令行上都被删除了。我可以确认转义序列存在于每个发出值的记录中,但是在发送到终端时它们似乎被剥离了。你知道吗

为什么?你知道吗

import datetime
import logging

import colorama
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import Terminal256Formatter

# Required for colored output
colorama.init()


class CustomFormatter(logging.Formatter):
    '''Modifies the level prefix of the log with the following level
    information:

    !!! - critical
     !  - error
     ?  - warn
        - info
     -  - debug
    '''
    default_prefix = '???'  # used with non-generic levels

    color_mapping = {
        logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
        logging.ERROR: colorama.Fore.RED + colorama.Style.BRIGHT,
        logging.WARNING: colorama.Fore.YELLOW + colorama.Style.BRIGHT,
        logging.DEBUG: colorama.Style.DIM,
    }

    prefix_mapping = {
        logging.CRITICAL: '!!!',
        logging.ERROR: ' ! ',
        logging.WARNING: ' ? ',
        logging.INFO: '   ',
        logging.DEBUG: ' · ',
    }

    def format(self, record):
        # Capture relevant record data
        level = self.prefix_mapping.get(record.levelno) or self.default_prefix
        msecs = datetime.datetime.fromtimestamp(record.created).strftime("%Y-%m-%d %H:%M:%S")
        msg = record.msg.rstrip('\n')

        # Setup colors
        color = self.color_mapping.get(record.levelno) or ''
        dim = colorama.Style.DIM
        reset = colorama.Fore.RESET + colorama.Style.RESET_ALL
        name = record.name
        func = record.funcName

        # Setup output
        lexer = JsonLexer()
        formatter = Terminal256Formatter()
        try:
            msg = '\n'.join(
                highlight(m, lexer, formatter).rstrip('\n')
                for m in msg.split('\n')
            )
        except:
            pass
        data = {k: v for k, v in locals().items()}
        d = '{color}{level}{reset} {dim}{msecs} [{name}]{reset} {msg}'.format(**data)
        record.msg = d

        # Dump
        return super(CustomFormatter, self).format(record)

用法:

import logging
from  CustomFormatter import CustomFormatter

def get_logger(name, level=None):
    level = logging.DEBUG if not isinstance(level, int) else level
    handler = logging.StreamHandler(sys.stdout)
    handler.level = level or logging.INFO
    formatter = CustomFormatter()
    handler.setFormatter(formatter)
    logger = logging.getLogger(name)
    logger.addHandler(handler)
    return logger

logger = get_logger('tester')
logger.error('Error here')

Tags: namefromimportselfprefixstyleloggingmsg
1条回答
网友
1楼 · 发布于 2024-03-28 09:27:54

我花了一些时间研究这个问题,我发现在pyspark下加载时,终端逃逸序列是不同的。我修复它的方法是使用pygments在我创建的终端输出上运行(参见函数:fix_for_spark)。他说

# -*- coding: utf-8 -*-
import datetime
import json
import logging
import os

import colorama
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import Terminal256Formatter

# Required for colored output
colorama.init()


class CustomFormatter(logging.Formatter):

    '''Modifies the level prefix of the log with the following level
    information:

    !!! - critical
     !  - error
     ?  - warn
        - info
     -  - debug
    '''
    default_prefix = '???'  # used with non-generic levels

    PYGMENTS_LEXER = JsonLexer()
    PYGMENTS_FORMATTER = Terminal256Formatter()

    color_mapping = {
        logging.CRITICAL: colorama.Fore.RED + colorama.Style.BRIGHT,
        logging.ERROR: colorama.Fore.RED + colorama.Style.BRIGHT,
        logging.WARNING: colorama.Fore.YELLOW + colorama.Style.BRIGHT,
        logging.DEBUG: colorama.Style.DIM,
    }

    prefix_mapping = {

        logging.CRITICAL: '!!!',
        logging.ERROR: ' ! ',
        logging.WARNING: ' ? ',
        logging.INFO: '  ️ ',
        logging.DEBUG: ' · ',
    }

    def fix_for_spark(self, string):
        if os.environ.get('SPARK_ENV_LOADED'):
            # Setup output
            new_string = []
            for s in string.split('\n'):
                s = highlight(string, self.PYGMENTS_LEXER, self.PYGMENTS_FORMATTER)
                new_string.append(s.rstrip('\n'))
            string = '\n'.join(new_string)
        return string

    def format(self, record):
        # Capture relevant record data
        data = dict(
            level=self.prefix_mapping.get(record.levelno) or self.default_prefix,
            msecs=datetime.datetime.fromtimestamp(record.created).strftime("%Y-%m-%d %H:%M:%S"),

            # Setup colors
            color=self.color_mapping.get(record.levelno) or '',
            dim=colorama.Style.DIM,
            reset=colorama.Fore.RESET + colorama.Style.RESET_ALL,
            name=record.name,
            func=record.funcName,
        )

        # Format msg
        prefix = '{color}{level}{reset} {dim}{msecs}{reset} {color}[{name}]{reset}'
        prefix = prefix.format(**data)
        prefix = self.fix_for_spark(prefix)
        msg = record.msg
        if not isinstance(msg, str):
            try:
                msg = json.dumps(msg, indent=4, sort_keys=True)
            except:
                msg = str(msg)
        dmsg = []
        for m in msg.split('\n'):
            m = highlight(m, self.PYGMENTS_LEXER, self.PYGMENTS_FORMATTER).rstrip('\n')
            m = self.fix_for_spark(m)
            dmsg.append(m)
        dmsg = '\n'.join(dmsg)
        data.update(locals().items())
        template = prefix + ' {msg}'
        record.msg = '\n'.join(template.format(msg=m) for m in dmsg.split('\n'))
        # Dump
        return super(CustomFormatter, self).format(record)

相关问题 更多 >