Source code for insights.parsers.nvidia

"""
NVIDIA Related Information
==========================

NvidiaSmiL - command ``/usr/bin/nvidia-smi -L``
-----------------------------------------------

NvidiaSmiActiveClocksEventReasons - command ``/usr/bin/nvidia-smi --query-gpu=name,clocks_event_reasons.active --format=csv,noheader``
--------------------------------------------------------------------------------------------------------------------------------------

NvidiaSmiQueryGPU - command ``/usr/bin/nvidia-smi --query-gpu=index,name,uuid,memory.total --format=csv,noheader``
------------------------------------------------------------------------------------------------------------------
"""

from collections import namedtuple
from insights.core import Parser
from insights.core.exceptions import ParseException, SkipComponent
from insights.core.plugins import parser
from insights.specs import Specs
from insights.util import deprecated


NvidiaGPUInfo = namedtuple(
    "NvidiaGPUInfo",
    ['index', 'model', 'uuid', 'memory_total'],
)
"""namedtuple: Represents the information parsed from ``nvidia-smi --query-gpu`` command output."""


# Refer to the following doc for the detailed bitmask of active clock event reasons:
# - https://docs.nvidia.com/deploy/nvml-api/group__nvmlClocksEventReasons.html
BITMASK = {
    "none": 0x0000000000000000,
    "gpu_idle": 0x0000000000000001,
    "applications_clocks_setting": 0x0000000000000002,
    "sw_power_cap": 0x0000000000000004,
    "hw_slowdown": 0x0000000000000008,
    "sync_boost": 0x0000000000000010,
    "sw_thermal_slowdown": 0x0000000000000020,
    "hw_thermal_slowdown": 0x0000000000000040,
    "hw_power_brake_slowdown": 0x0000000000000080,
    "display_clock_setting": 0x0000000000000100,
}


[docs] @parser(Specs.nvidia_smi_l) class NvidiaSmiL(Parser, list): """ .. warning:: This class is deprecated and will be removed from 3.8.0. Please use the :class:`insights.parsers.nvidia.NvidiaSmiQueryGPU` instead. Prase for output of command `/usr/bin/nvidia-smi -L`. This command lists each of the NVIDIA GPUs in the system, along with their UUIDs. The GPU info shown in each line will be parsed as follows:: model (string): The gpu model uuid (string): The gpu uuid Raises: ParseException: When run into unparsable gpu line SkipComponent: When content is empty or no parsable content Sample Content:: GPU 0: NVIDIA A100-PCIE-40GB (UUID: GPU-63110aaa-3561-c8f5-e125-4ab40bbcf838) GPU 1: NVIDIA A100-PCIE-40GB (UUID: GPU-c9bd25dc-c0c4-3ab6-8f7f-3ad16d6bde4a) Examples:: >>> gpus.gpu_count 2 >>> "NVIDIA A100-PCIE-40GB" in gpus.gpu_models True >>> gpus[0] {'model': 'NVIDIA A100-PCIE-40GB', 'uuid': 'GPU-63110aaa-3561-c8f5-e125-4ab40bbcf838'} """ def __init__(self, context): deprecated( NvidiaSmiL, "Please use the :class:`insights.parsers.nvidia.NvidiaSmiQueryGPU` instead.", "3.8.0", ) super(NvidiaSmiL, self).__init__(context)
[docs] def parse_content(self, content): if not content: raise SkipComponent("Empty content") _gpu_index_validator = 0 for line in content: if line.startswith("GPU ") and "(UUID: " in line and line.endswith(')'): spl_line = line.split("(UUID: ") spl_left = spl_line[0].split(': ') if not len(spl_left) == 2: raise ParseException("Unparsable GPU model: %s" % line) gpu_id = spl_left[0].split()[-1] if not (gpu_id.isdigit() and int(gpu_id) == _gpu_index_validator): raise ParseException("Unparsable GPU id: %s" % line) _gpu_index_validator += 1 gpu_model = spl_left[-1].strip() gpu_uuid = spl_line[-1].strip(') ') if not (gpu_model and gpu_uuid): raise ParseException("Unparsable GPU line: %s" % line) self.append( { "model": gpu_model, "uuid": gpu_uuid, } ) if len(self) < 1: raise ParseException("Empty GPU info after parse: %s" % content)
@property def gpu_count(self): """ str: Returns the GPU count """ return len(self) @property def gpu_models(self): """ str: Returns the GPUs model set """ return set([gpu["model"] for gpu in self])
[docs] @parser(Specs.nvidia_smi_active_clocks_event_reasons) class NvidiaSmiActiveClocksEventReasons(Parser, list): """ Parser for the output of command `/usr/bin/nvidia-smi --query-gpu=name,clocks_event_reasons.active --format=csv,noheader`. This command lists each of the NVIDIA GPUs in the system, along with their names and bitmasks of active clock event reasons. Raises: ParseException: When run into an unparsable line SkipComponent: When content is empty Sample Content:: NVIDIA L4, 0x0000000000000001 NVIDIA A1, 0x0000000000000000 NVIDIA H1, 0x0000000000000084 Examples:: >>> type(active_clocks_event_reasons) <class 'insights.parsers.nvidia.NvidiaSmiActiveClocksEventReasons'> >>> len(active_clocks_event_reasons) 3 >>> active_clocks_event_reasons[0]['applications_clocks_setting'] False >>> active_clocks_event_reasons[2]['hw_power_brake_slowdown'] True >>> active_clocks_event_reasons[2]['none'] False """
[docs] def parse_content(self, content): if not content: raise SkipComponent("Empty content.") for line in content: items = line.split(",") if len(items) != 2 or not items[1].strip().startswith("0x"): raise ParseException( "Not an expected command output for active clocks event reasons: %s" % line ) bitmask = int(items[1].strip().strip("LL"), 16) data = dict(gpu_name=items[0].strip()) for key, bm in BITMASK.items(): if key == "none": data[key] = bm | bitmask == bm else: data[key] = bm & bitmask == bm self.append(data)
[docs] @parser(Specs.nvidia_smi_query_gpu) class NvidiaSmiQueryGPU(Parser, list): """ Parser for the output of command `/usr/bin/nvidia-smi --query-gpu=index,name,uuid,memory.total --format=csv,noheader`. This command lists each of the NVIDIA GPUs info in the system. The GPU info shown in each line will be parsed to a namedtuple ``NvidiaGPUInfo`` contains the follows info:: index (str): The gpu index model (str): The gpu model uuid (str): The gpu uuid memory_total(str): The gpu total memory Raises: ParseException: When run into an unparsable line SkipComponent: When content is empty Sample Content:: 0, NVIDIA L4, GPU-24598a07-f97d-fc79-86de-485c4c82d01c, 23034 MiB 1, NVIDIA A1, GPU-63110aaa-3561-c8f5-e125-4ab40bbcf838, 16280 MiB 2, NVIDIA H1, GPU-c9bd25dc-c0c4-3ab6-8f7f-3ad16d6bde4a, 24564 MiB Examples:: >>> gpus_info.gpu_count 3 >>> "NVIDIA L4" in gpus_info.gpu_models True >>> type(gpus_info[0]) <class 'insights.parsers.nvidia.NvidiaGPUInfo'> >>> gpus_info[0].uuid 'GPU-24598a07-f97d-fc79-86de-485c4c82d01c' >>> gpus_info[0].memory_total '23034 MiB' """ _columns = NvidiaGPUInfo._fields
[docs] def parse_content(self, content): for line in content: items = [v.strip() for v in line.strip().split(",")] if len(items) == len(self._columns) and all(items): self.append(NvidiaGPUInfo(*items)) else: raise ParseException("Not an expected command output: %s" % line) if len(self) == 0: raise SkipComponent
@property def gpu_count(self): """ str: Returns the GPU count """ return len(self) @property def gpu_models(self): """ str: Returns the GPUs model set """ return set([gpu.model for gpu in self])