Source code for insights.parsers.ps

"""
Ps - command ``ps auxww`` and others
====================================

This module provides processing for the various outputs of the ``ps`` command.
"""
from insights.core import CommandParser, ContainerParser
from insights.core.exceptions import ParseException
from insights.core.filters import add_filter
from insights.core.plugins import parser
from insights.parsers import keyword_search, parse_delimited_table
from insights.specs import Specs
from insights.util import deprecated


[docs] def are_present(tags, line): """bool: Returns True if all tags are present in line.""" return all(tag in line for tag in tags)
[docs] class Ps(CommandParser): """ Template Class to parse ``ps`` command output. Raises: ParseException: Raised if the heading line (containing both ``user_name`` and ``command_name``) is not found in the input. Attributes: data (list): List of dicts, where the keys in each dict are the column headers and each item in the list represents a process. running (set): Set of full command strings for each command including optional path and arguments, in order of listing in the `ps` output. cmd_names (set): Set of just the command names, minus any path or arguments. services (list): List of tuples in format (cmd names, user/uid/pid, raw_line) for each command. pid_info (dict): Dictionary indexed by ``pid`` returning dict of process info. """ command_name = "COMMAND_TEMPLATE" ''' ``command_name`` is the name of the subclass specific command column from the header of ps output, the subclass must override it correspondingly ''' user_name = "USER_TEMPLATE" ''' ``user_name`` is the name of the subclass specificuser_name column from the header of ps output, the subclass must override it correspondingly ''' max_splits = 0 ''' ``max_splits`` is the split number for the columns from the ps output, the subclass must override it correspondingly ''' def __init__(self, *args, **kwargs): self.data = [] self.running = set() self.cmd_names = set() self.services = [] self.pid_info = {} super(Ps, self).__init__(*args, **kwargs)
[docs] def parse_content(self, content): raw_line_key = "_line" header_line = next((l for l in content if are_present(tags=[self.user_name, self.command_name], line=l)), None) if header_line is not None: # parse_delimited_table allows short lines, but we specifically # want to ignore them. self.data = [ row for row in parse_delimited_table( content, heading_ignore=[header_line], max_splits=self.max_splits, raw_line_key=raw_line_key ) # skip the insights-client self grep process "grep -F .." if self.command_name in row and not row[self.command_name].startswith('grep -F ') ] # The above list comprehension assures all rows have a command. for proc in self.data: cmd = proc[self.command_name] self.running.add(cmd) cmd_name = cmd if cmd.startswith('/'): cmd_name = cmd.split(None, 1)[0].split("/")[-1] elif ' ' in cmd: cmd_name = cmd.split(None, 1)[0] proc["COMMAND_NAME"] = cmd_name self.cmd_names.add(cmd_name) proc["ARGS"] = cmd.split(" ", 1)[1] if " " in cmd else "" self.services.append((cmd_name, proc[self.user_name], proc[raw_line_key])) del proc[raw_line_key] pid = None stat = None threads = 0 for row in self.data: _pid = row['PID'] if _pid.isdigit(): if threads: # Set the number of threads for the previous entry, and # set the entry's stat to the stat of the last thread. self.pid_info[pid].update({"STAT": stat, "threads": threads}) pid = _pid self.pid_info[_pid] = row stat = None threads = 0 else: stat = row['STAT'] threads += 1 else: # Check if there was a thread as the last row. if threads: self.pid_info[pid].update({"STAT": stat, "threads": threads}) else: raise ParseException( "{0}: Cannot find ps header line containing {1} and {2} in output".format( self.__class__.__name__, self.user_name, self.command_name) )
def __contains__(self, proc): return proc in self.running def __iter__(self): for row in self.data: yield row
[docs] def running_pids(self): """ Gives the list of process IDs in the order listed. Returns: list: the PIDs from the PID column. """ return [row["PID"] for row in self.data if "PID" in row]
[docs] def users(self, proc): """ Searches for all users running a given command. If the user column is not present then returns an empty dict. Returns: dict: each username as a key to a list of PIDs (as strings) that are running the given process. ``{}`` if neither ``USER`` nor ``UID`` is found or ``proc`` is not found. .. note:: 'proc' must match the entire command and arguments. """ valid_user_columns = ['USER', 'UID'] ret = {} if self.user_name in valid_user_columns: for row in self.data: if proc == row[self.command_name]: if row[self.user_name] not in ret: ret[row[self.user_name]] = [] ret[row[self.user_name]].append(row["PID"]) return ret
[docs] def fuzzy_match(self, proc): """ Are there any commands that contain the given text? Returns: boolean: ``True`` if the word ``proc`` appears in the command column. .. note:: 'proc' can match anywhere in the command path, name or arguments. """ return any(proc in row[self.command_name] for row in self.data)
[docs] def number_occurences(self, proc): """ Returns the number of occurencies of commands that contain given text Returns: int: The number of occurencies of commands with given text .. note:: 'proc' can match anywhere in the command path, name or arguments. """ return len([True for row in self.data if proc in row[self.command_name]])
[docs] def search(self, **kwargs): """ Search the process list for matching rows based on key-value pairs. This uses the :py:func:`insights.parsers.keyword_search` function for searching; see its documentation for usage details. If no search parameters are given, no rows are returned. Returns: list: A list of dictionaries of processes that match the given search criteria. Examples: >>> ps.search(COMMAND__contains='bash') == [ ... {'%MEM': '0.0', 'TTY': 'pts/3', 'VSZ': '108472', 'ARGS': '', 'PID': '20160', '%CPU': '0.0', ... 'START': '10:09', 'COMMAND': '/bin/bash', 'USER': 'user1', 'STAT': 'Ss', 'TIME': '0:00', ... 'COMMAND_NAME': 'bash', 'RSS': '1896'}, ... {'%MEM': '0.0', 'TTY': '?', 'VSZ': '9120', 'ARGS': '', 'PID': '20457', '%CPU': '0.0', ... 'START': '10:09', 'COMMAND': '/bin/bash', 'USER': 'root', 'STAT': 'Ss', 'TIME': '0:00', ... 'COMMAND_NAME': 'bash', 'RSS': '832'} ... ] True >>> ps.search(USER='root', COMMAND__contains='bash') == [ ... {'%MEM': '0.0', 'TTY': '?', 'VSZ': '9120', 'ARGS': '', 'PID': '20457', '%CPU': '0.0', ... 'START': '10:09', 'COMMAND': '/bin/bash', 'USER': 'root', 'STAT': 'Ss', 'TIME': '0:00', ... 'COMMAND_NAME': 'bash', 'RSS': '832'} ... ] True >>> ps.search(TTY='pts/3') == [ ... {'%MEM': '0.0', 'TTY': 'pts/3', 'VSZ': '108472', 'ARGS': '', 'PID': '20160', '%CPU': '0.0', ... 'START': '10:09', 'COMMAND': '/bin/bash', 'USER': 'user1', 'STAT': 'Ss', 'TIME': '0:00', ... 'COMMAND_NAME': 'bash', 'RSS': '1896'} ... ] True >>> ps.search(STAT__contains='Z') == [ ... {'%MEM': '0.0', 'TTY': '?', 'VSZ': '0', 'ARGS': '', 'PID': '1821', '%CPU': '0.0', ... 'START': 'May31', 'COMMAND': '[kondemand/0]', 'USER': 'root', 'STAT': 'Z', 'TIME': '0:29', ... 'COMMAND_NAME': '[kondemand/0]', 'RSS': '0'} ... ] True """ return keyword_search(self.data, **kwargs)
add_filter(Specs.ps_auxww, "COMMAND")
[docs] @parser(Specs.ps_auxww) class PsAuxww(Ps): """ Class ``PsAuxww`` parses the output of the ``ps auxww`` command. A small sample of the output of this command looks like:: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 1 0.0 0.0 19356 1544 ? Ss May31 0:01 /usr/lib/systemd/systemd --switched-root --system --deserialize 22 root 1661 0.0 0.0 126252 1392 ? Ss May31 0:04 /usr/sbin/crond -n root 1691 0.0 0.0 42688 172 ? Ss May31 0:00 /usr/sbin/rpc.mountd root 1821 0.0 0.0 0 0 ? Z May31 0:29 [kondemand/0] root 1864 0.0 0.0 18244 668 ? Ss May31 0:05 /usr/sbin/irqbalance --foreground user1 20160 0.0 0.0 108472 1896 pts/3 Ss 10:09 0:00 /bin/bash root 20357 0.0 0.0 9120 832 ? Ss 10:09 0:00 /usr/sbin/dhclient enp0s25 root 20457 0.0 0.0 9120 832 ? Ss 10:09 0:00 /bin/bash PsAuxww attempts to read the output of ``ps auxwww``, ``ps aux``, and ``ps auxcww`` commands from archives. Examples: >>> type(ps_auxww) <class 'insights.parsers.ps.PsAuxww'> >>> ps_auxww.running == set([ ... '/bin/bash', '/usr/sbin/rpc.mountd', '/usr/lib/systemd/systemd --switched-root --system --deserialize 22', ... '/usr/sbin/irqbalance --foreground', '/usr/sbin/dhclient enp0s25', '[kondemand/0]', '/usr/sbin/crond -n' ... ]) True >>> ps_auxww.cpu_usage('[kondemand/0]') '0.0' >>> ps_auxww.users('/bin/bash') == {'root': ['20457'], 'user1': ['20160']} True >>> ps_auxww.fuzzy_match('dhclient') True >>> sum(int(p['VSZ']) for p in ps_auxww) 333252 """ command_name = "COMMAND" user_name = "USER" max_splits = 10
[docs] def cpu_usage(self, proc): """ Searches for the first command matching ``proc`` and returns its CPU usage as a string. Returns: str: the %CPU column corresponding to ``proc`` in command or ``None`` if ``proc`` is not found. .. note:: 'proc' must match the entire command and arguments. """ for row in self.data: if proc == row[self.command_name]: return row["%CPU"]
pass
add_filter(Specs.ps_ef, "CMD")
[docs] @parser(Specs.ps_ef) class PsEf(Ps): """ Class ``PsEf`` parses the output of the ``ps -ef`` command. A small sample of the output of this command looks like:: UID PID PPID C STIME TTY TIME CMD root 1 0 0 03:53 ? 00:00:06 /usr/lib/systemd/systemd --system --deserialize 15 root 2 0 0 03:53 ? 00:00:00 [kthreadd] root 1803 1 5 03:54 ? 00:55:22 /usr/bin/openshift start master --config=/etc/origin/master/master-config.yaml --loglevel root 1969 1 3 03:54 ? 00:33:51 /usr/bin/openshift start node --config=/etc/origin/node/node-config.yaml --loglevel=2 root 1995 1 0 03:54 ? 00:02:06 /usr/libexec/docker/rhel-push-plugin root 2078 1969 0 03:54 ? 00:00:00 journalctl -k -f root 7201 1 0 03:59 ? 00:00:00 /usr/bin/python /usr/libexec/rhsmd root 111434 1 0 22:32 ? 00:00:00 nginx: master process /usr/sbin/nginx -c /etc/nginx/nginx.conf nginx 111435 111434 0 22:32 ? 00:00:00 nginx: worker process PsEf attempts to read the output of ``ps -ef`` commands from archives. Examples: >>> type(ps_ef) <class 'insights.parsers.ps.PsEf'> >>> ps_ef.parent_pid("111435") ['111434', 'nginx: master process /usr/sbin/nginx -c /etc/nginx/nginx.conf'] >>> ps_ef.users('nginx: worker process') {'nginx': ['111435']} >>> ps_ef.fuzzy_match('kthreadd') True """ command_name = "CMD" user_name = "UID" max_splits = 7
[docs] def parent_pid(self, pid): """ Search for the parent pid of command matching ``pid`` and returns the parent pid. Returns: list: First one is the parent pid corresponding to ``pid`` in command and second one is parent command name. ``None`` if ``proc`` is not found. """ for row in self.data: if pid == row["PID"]: for sub_row in self.data: if sub_row["PID"] == row["PPID"]: return [row["PPID"], sub_row[self.command_name]]
pass
[docs] @parser(Specs.ps_auxcww) class PsAuxcww(PsAuxww): pass
add_filter(Specs.ps_aux, "COMMAND")
[docs] @parser(Specs.ps_aux) class PsAux(PsAuxww): pass
add_filter(Specs.container_ps_aux, "COMMAND")
[docs] @parser(Specs.container_ps_aux) class ContainerPsAux(ContainerParser, PsAuxww): """ Class to parse the command `ps aux` from the containers. Sample input data:: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 1 0.0 0.0 19356 1544 ? Ss May31 0:01 /sbin/init root 1821 0.0 0.0 0 0 ? S May31 0:25 [kondemand/0] root 1864 0.0 0.0 18244 668 ? Ss May31 0:05 irqbalance --pid=/var/run/irqbalance.pid user1 20160 0.0 0.0 108472 1896 pts/3 Ss 10:09 0:00 bash root 20357 0.0 0.0 9120 760 ? Ss 10:09 0:00 /sbin/dhclient -1 -q -lf /var/lib/dhclient/dhclient-extbr0.leases -pf /var/run/dhclient-extbr0.pid extbr0 qemu 22673 0.8 10.2 1618556 805636 ? Sl 11:38 1:07 /usr/libexec/qemu-kvm -name rhel7 -S -M rhel6.5.0 -enable-kvm -m 1024 -smp 2,sockets=2,cores=1,threads=1 -uuid 13798ffc-bc1e-d437-4f3f-2e0fa6c923ad tomcat 3662 1.0 5.7 2311488 58236 ? Ssl 07:28 0:01 /usr/lib/jvm/jre/bin/java -classpath /usr/share/tomcat/bin/bootstrap.jar:/usr/share/tomcat/bin/tomcat-juli.jar:/usr/share/java/commons-daemon.jar -Dcatalina.base=/usr/share/tomcat -Dcatalina.home=/usr/share/tomcat -Djava.endorsed.dirs= -Djava.io.tmpdir=/var/cache/tomcat/temp -Djava.util.logging.config.file=/usr/share/tomcat/conf/logging.properties -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager org.apache.catalina.startup.Bootstrap start Examples: >>> type(container_ps_aux) <class 'insights.parsers.ps.ContainerPsAux'> >>> container_ps_aux.container_id '2869b4e2541c' >>> container_ps_aux.image 'registry.access.redhat.com/ubi8/nginx-120' >>> container_ps_aux.number_occurences("bash") 1 """ pass
[docs] @parser(Specs.ps_eo) class PsEo(Ps): """ .. warning:: This class is deprecated and will be removed from 3.6.0. Please use the :class:`insights.parsers.ps.PsEoCmd` instead. Class to parse the command `ps -eo pid,ppid,comm,nlwp` Sample input data:: PID PPID COMMAND NLWP 1 0 systemd 1 2 0 kthreadd 1 3 2 ksoftirqd/0 1 2416 1 auditd 1 2419 2416 audispd 1 2421 2419 sedispatch 1 2892 1 NetworkManager 1 3172 2892 dhclient 1 3871 1 master 1 3886 3871 qmgr 1 13724 3871 pickup 1 15663 2 kworker/0:1 1 16998 2 kworker/0:3 1 17259 2 kworker/0:0 1 18294 3357 sshd 1 Examples: >>> type(ps_eo) <class 'insights.parsers.ps.PsEo'> >>> ps_eo.pid_info['1'] == {'PID': '1', 'PPID': '0', 'COMMAND': 'systemd', 'COMMAND_NAME': 'systemd', 'ARGS': '', 'NLWP': '1'} True >>> ps_eo.children('2') == [ ... {'PID': '3', 'PPID': '2', 'COMMAND': 'ksoftirqd/0', 'COMMAND_NAME': 'ksoftirqd/0', 'ARGS': '', 'NLWP': '1'}, ... {'PID': '15663', 'PPID': '2', 'COMMAND': 'kworker/0:1', 'COMMAND_NAME': 'kworker/0:1', 'ARGS': '', 'NLWP': '1'}, ... {'PID': '16998', 'PPID': '2', 'COMMAND': 'kworker/0:3', 'COMMAND_NAME': 'kworker/0:3', 'ARGS': '', 'NLWP': '1'}, ... {'PID': '17259', 'PPID': '2', 'COMMAND': 'kworker/0:0', 'COMMAND_NAME': 'kworker/0:0', 'ARGS': '', 'NLWP': '1'} ... ] True """ command_name = 'COMMAND' user_name = 'PID' max_splits = 3 def __init__(self, *args, **kwargs): deprecated(PsEo, "Please use the :class:`insights.parsers.ps.PsEoCmd` instead.", "3.6.0") super(PsEo, self).__init__(*args, **kwargs)
[docs] def children(self, ppid): """list: Returns a list of dict for all rows with `ppid` as parent PID""" return [row for row in self.data if row['PPID'] == ppid]
add_filter(Specs.ps_alxwww, "COMMAND")
[docs] @parser(Specs.ps_alxwww) class PsAlxwww(Ps): """ Class to parse the command `ps alxwww`. See method and attribute details in the ``Ps`` parser. Sample input data:: F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND 4 0 1 0 20 0 128292 6928 ep_pol Ss ? 0:02 /usr/lib/systemd/systemd --switched-root --system --deserialize 22 1 0 2 0 20 0 0 0 kthrea S ? 0:00 [kthreadd] 1 0 3 2 20 0 0 0 smpboo S ? 0:00 [ksoftirqd/0] 5 0 4 2 20 0 0 0 worker S ? 0:00 [kworker/0:0] 1 0 5 2 0 -20 0 0 worker S< ? 0:00 [kworker/0:0H] 1 0 6 2 20 0 0 0 worker S ? 0:00 [kworker/u4:0] 1 0 7 2 -100 - 0 0 smpboo S ? 0:00 [migration/0] 1 0 8 2 20 0 0 0 rcu_gp S ? 0:00 [rcu_bh] Examples: >>> type(ps_alxwww) <class 'insights.parsers.ps.PsAlxwww'> >>> 'systemd' in ps_alxwww.cmd_names True >>> '/usr/lib/systemd/systemd --switched-root --system --deserialize 22' in ps_alxwww.running True >>> ps_alxwww.search(COMMAND_NAME__contains='systemd') == [{ ... 'F': '4', 'UID': '0', 'PID': '1', 'PPID': '0', 'PRI': '20', 'NI': '0', 'VSZ': '128292', 'RSS': '6928', ... 'WCHAN': 'ep_pol', 'STAT': 'Ss', 'TTY': '?', 'TIME': '0:02', ... 'COMMAND': '/usr/lib/systemd/systemd --switched-root --system --deserialize 22', ... 'COMMAND_NAME': 'systemd', 'ARGS': '--switched-root --system --deserialize 22' ... }] True """ command_name = 'COMMAND' user_name = 'UID' max_splits = 12 pass
[docs] @parser(Specs.ps_eo_cmd) class PsEoCmd(Ps): """ Class to parse the command `ps -ewwo pid,ppid,nlwp,args` where the datasource `ps_eo_cmd` trims off all args leaving only the full path to the command. Sample output from the ``ps -ewwo pid,ppid,nlwp,args`` command:: PID PPID NLWP COMMAND 1 0 1 /usr/lib/systemd/systemd --switched-root --system --deserialize 31 2 0 1 [kthreadd] 11 2 1 /usr/bin/python3 /home/user1/python_app.py 12 2 1 [kworker/u16:0-kcryptd/253:0] Sample data after trimming by the datasource:: PID PPID NLWP COMMAND 1 0 1 /usr/lib/systemd/systemd 2 0 1 [kthreadd] 11 2 1 /usr/bin/python3 12 2 1 [kworker/u16:0-kcryptd/253:0] Examples: >>> type(ps_eo_cmd) <class 'insights.parsers.ps.PsEoCmd'> >>> ps_eo_cmd.running_pids() == ['1', '2', '11', '12'] True >>> ps_eo_cmd.search(COMMAND__contains='python3') == [ ... {'PID': '11', 'PPID': '2', 'NLWP': '1', 'COMMAND': '/usr/bin/python3', ... 'COMMAND_NAME': 'python3', 'ARGS': ''} ... ] True """ command_name = 'COMMAND' user_name = 'PID' max_splits = 3
[docs] def children(self, ppid): """list: Returns a list of dict for all rows with `ppid` as parent PID""" return [row for row in self.data if row['PPID'] == ppid]