Source code for insights.parsers.vdsm_log

"""
VDSMLog - file ``/var/log/vdsm/vdsm.log`` and ``/var/log/vdsm/import/import-*.log``
===================================================================================
"""

import re
from insights import LogFileOutput, parser
from datetime import datetime
from insights.specs import Specs


[docs] @parser(Specs.vdsm_log) class VDSMLog(LogFileOutput): """Logs from the Virtual Desktop and Server Manager. Uses ``LogFileOutput`` as the base class - see its documentation for more specific usage details. Sample logs from VDSM version 3:: Thread-60::DEBUG::2015-05-08 18:01:03,071::blockSD::600::Storage.Misc.excCmd::(getReadDelay) '/bin/dd if=/dev/5a30691d-4fae-4023-ae96-50704f6b253c/metadata iflag=direct of=/dev/null bs=4096 count=1' (cwd None) Thread-60::DEBUG::2015-05-08 18:01:03,090::blockSD::600::Storage.Misc.excCmd::(getReadDelay) SUCCESS: <err> = '1+0 records in\\n1+0 records out\\n4096 bytes (4.1 kB) copied, 0.00038933 s, 10.5 MB/s\\n'; <rc> = 0 Thread-65::DEBUG::2015-05-08 18:01:04,835::blockSD::600::Storage.Misc.excCmd::(getReadDelay) '/bin/dd if=/dev/e70cce65-0d02-4da4-8781-6aeeef5c86ff/metadata iflag=direct of=/dev/null bs=4096 count=1' (cwd None) Thread-65::DEBUG::2015-05-08 18:01:04,857::blockSD::600::Storage.Misc.excCmd::(getReadDelay) SUCCESS: <err> = '1+0 records in\\n1+0 records out\\n4096 bytes (4.1 kB) copied, 0.000157193 s, 26.1 MB/s\\n'; <rc> = 0 Thread-4662::DEBUG::2015-05-08 18:01:05,560::task::595::TaskManager.Task::(_updateState) Task=`9a7948f6-b6d9-42c2-b91f-7e0346dfc1d6`::moving from state init -> state preparing Example: >>> from insights.parsers.vdsm_log import VDSMLog >>> from insights.tests import context_wrap >>> vdsm_log = VDSMLog(context_wrap(VDSM_LOG)) >>> vdsm_log.get('TaskManager') 'Thread-4662::DEBUG::2015-05-08 18:01:05,560::task::595::TaskManager.Task::(_updateState) Task=`9a7948f6-b6d9-42c2-b91f-7e0346dfc1d6`::moving from state init -> state preparing' >>> list(vdsm_log.parse_lines(vdsm_log.get('TaskManager')))[0] # from generator to list to subscript {'thread': 'Thread-4662', 'level': 'DEBUG', 'asctime': datetime(2015, 5, 8, 18, 1, 5, 56000), 'module': 'task', 'line': '595', 'logname': 'TaskManager.Task', 'message': 'Task=`9a7948f6-b6d9-42c2-b91f-7e0346dfc1d6`::moving from state init -> state preparing' } Note: VDSM version 4 has different logs format than version 3. VDSM version 4 logs parser is designed to closely match log format as referred in ``/etc/vdsm/logger.conf`` which is as below:: format: %(asctime)s %(levelname)-5s (%(threadName)s) [%(name)s] %(message)s (%(module)s:%(lineno)d) Sample logs from VDSM version 4:: 2017-04-18 14:00:00,000+0200 INFO (jsonrpc/2) [jsonrpc.JsonRpcServer] RPC call Host.getStats succeeded in 0.02 seconds (__init__:515) 2017-04-18 14:00:01,807+0200 INFO (Reactor thread) [ProtocolDetector.AcceptorImpl] Accepted connection from ::ffff:10.34.60.219:49213 (protocoldetector:72) 2017-04-18 14:00:01,808+0200 ERROR (Reactor thread) [ProtocolDetector.SSLHandshakeDispatcher] Error during handshake: unexpected eof (m2cutils:304) 2017-04-18 14:00:03,304+0200 INFO (jsonrpc/6) [jsonrpc.JsonRpcServer] RPC call Host.getAllVmStats succeeded in 0.00 seconds (__init__:515) 2017-04-18 14:00:05,870+0200 INFO (jsonrpc/7) [dispatcher] Run and protect: getSpmStatus(spUUID=u'00000002-0002-0002-0002-00000000024f', options=None) (logUtils:51) Example: >>> from insights.parsers.vdsm_log import VDSMLog >>> from insights.tests import context_wrap >>> vdsm_log = VDSMLog(context_wrap(VDSM_LOG)) >>> lines_with_error = vdsm_log.get('ERROR') >>> list(vdsm_log.parse_lines(lines_with_error))[0] {'asctime': datetime(2017, 4, 18, 14, 0, 1, 808000), 'levelname': 'ERROR', 'thread_name': 'Reactor thread', 'name': 'ProtocolDetector.SSLHandshakeDispatcher', 'message': 'Error during handshake: unexpected eof', 'module': 'm2cutils', 'lineno': '304' } """
[docs] def parse_lines(self, lines): """Parse log lines to be used with keep_scan or get Parameters: lines(list): Lines to be parsed Yields: Dictionary with following keys * **asctime(datetime)** - date and time as datetime object * **level(str)** - log level. Can me INFO, ERROR, WARN or DEBUG * **thread(str)** - thread name * **logname(str)** - filter name * **message(str)** - the body of the message * **module(str)** - module name * **lineno(str)** - line number which triggered the log This will NOT parse Python Traceback. Any unparsed line(s) will be yield as a list """ time_format = '%Y-%m-%d %H:%M:%S,%f' # Parse data & time including milisecond at the begining of # line. Will ignore TZ hours difference. # Example: "2017-04-18 13:56:28,096+0200" # 2017 - 04 - 18 " " 13:56:28 , 096 +0200 # \d{4} - \d{2} - \d{2} \s+ [\d{2}:]+\d{2} , \d{3} (ignored) # noqa re_datetime_obj = re.compile(r"^(\d{4}-\d{2}-\d{2}\s+[\d{2}:]+\d{2},\d{3})") # noqa # Parse object name # Example: "[virt.vm]" # [ virt.vm ] # \[ (\w.+) \] re_name_obj = re.compile(r"\[(\w.+)\]") # In VDSM version 4 logs, three elements fall inside '()'. # Below pattern will catch anything that is in '() re_thread_and_module = re.compile(r"\((.*?)\)") # Capture anythin that falls between '[]' and '()'. '()' is # also at the end of line. # Example: "[virt.vm] The vm start process failed (vm:617)" # \[\w.+\] (.*?) \(\w+\:\w+\)$ re_message_obj = re.compile(r"\[\w.+\](.*?)\(\w+\:\w+\)$") # Capture INFO, ERROR, DEBUG and, WARN re_level_obj = re.compile(r"INFO|ERROR|DEBUG|WARN") for line in lines: # import pdb; pdb.set_trace() if isinstance(line, dict): line = line['raw_message'] if re_datetime_obj.match(line): # VDSM version 4 log parser fields = dict() thread_and_module = re_thread_and_module.findall(line) timestamp = re_datetime_obj.search(line).group() fields['level'] = re_level_obj.search(line).group() fields['thread'] = thread_and_module[0] fields['logname'] = re_name_obj.findall(line)[0] fields['message'] = re_message_obj.findall(line)[0].strip() fields['module'], fields['lineno'] = thread_and_module[-1].split(':') # noqa fields['asctime'] = datetime.strptime(timestamp, time_format) yield fields else: # VDSM version 3 log parser # # (psachin): I tried to keep the code unchanged as possible. # # If the line is too short, for some reason, then as many # fields as possible are pulled from the line. fieldnames = ('thread', 'level', 'timestamp', 'module', 'line', 'logname') fields = dict() parts = line.split('::', 6) fields.update(dict((k, v) for (k, v) in zip(fieldnames, filter(None, parts)))) if len(parts) == 7: func, msg = parts[6].split(' ', 1) fields['message'] = msg # Did we get a timestamp in there? if 'timestamp' in fields: # Try to convert the datetime if possible try: fields['asctime'] = datetime.strptime( fields['timestamp'], time_format) del fields['timestamp'] except: pass yield fields
[docs] @parser(Specs.vdsm_import_log) class VDSMImportLog(LogFileOutput): """Parser for the log file detailing virtual machine imports. Sample log file:: [ 0.2] preparing for copy [ 0.2] Copying disk 1/1 to /rhev/data-center/958ca292-9126/f524d2ba-155a/images/502f5598-335d-/d4b140c8-9cd5 [ 0.0] >>> source, dest, and storage-type have different lengths Example: >>> log = vdsm_import_logs.get('preparing for copy') >>> len(log) 1 >>> log[0].get('raw_message', None) '[ 0.2] preparing for copy' >>> vdsm_import_logs.vm_uuid # file: import-1f9efdf5-2584-4a2a-8f85-c3b6f5dac4e0-20180130T154807.log '1f9efdf5-2584-4a2a-8f85-c3b6f5dac4e0' >>> vdsm_import_logs.file_datetime datetime.datetime(2018, 1, 30, 15, 48, 07) Attributes: vm_uuid (str): UUID of imported VM file_datetime (datetime): Date and time that import began. """ _line_re = re.compile(r'^(?:\[\s+(?P<timestamp>\d+\.\d+)\]\s+)?(?P<message>.*)$')
[docs] def parse_content(self, content): """Parse ``import-@UUID-@datetime.log`` log file.""" super(VDSMImportLog, self).parse_content(content) splited_file_name = self.file_name.split('-') self.vm_uuid = '-'.join(splited_file_name[1:-1]) _datetime = splited_file_name[-1].replace('.log', '') try: self.file_datetime = datetime.strptime(_datetime, '%Y%m%dT%H%M%S') except: self.file_datetime = None
[docs] def get_after(self, timestamp, s=None): """Find all the (available) logs that are after the given time stamp. If `s` is not supplied, then all lines are used. Otherwise, only the lines contain the `s` are used. `s` can be either a single string or a strings list. For list, all keywords in the list must be found in the line. Parameters: timestamp(float): log lines after this time are returned. s(str or list): one or more strings to search for. If not supplied, all available lines are searched. Yields: Log lines with time stamps after the given time. Raises: TypeError: The ``timestamp`` should be in `float` type, otherwise a `TypeError` will be raised. """ if not isinstance(timestamp, float): raise TypeError( "get_after needs a float type timestamp, but get '{c}'".format( c=timestamp) ) including_lines = False search_by_expression = self._valid_search(s) for line in self.lines: # If `s` is not None, keywords must be found in the line if s and not search_by_expression(line): continue # Otherwise, search all lines match = self._line_re.search(line) if match and match.group('timestamp'): # Get logtimestamp and compare to given timestamp logstamp = float(match.group('timestamp')) if logstamp >= timestamp: including_lines = True yield self._parse_line(line) else: including_lines = False else: # If we're including lines, add this continuation line if including_lines: yield self._parse_line(line)