ibstat はコンピュータに接続されている InfiniBand アダプタの詳しい情報を表示します。 マルチノード環境において全てのマシンのibstat情報をcsv形式で表示するスクリプトを共有します。
import subprocess import csv import sys username = "YOUR_USERNAME" servers = [ "xxxxx", "yyyyy", "zzzzz", "your_server_names", ] def exec_command(commands): process = subprocess.Popen( commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) stdout, stderr = process.communicate() return stdout.decode().strip().split("\n") def get_devices(server): output = exec_command(("ssh", f"{username}@{server}", "'ibstat'", "-l")) return output def get_device_data(server, device): # get standard output of ibstat output = exec_command(("ssh", f"{username}@{server}", "'ibstat'", f"{device}")) # Convert the given string to a structured JSON format data = {"server": server, "device": device} current_port = None for line in output: if line.startswith(f"\tPort"): current_port = line.strip().split(":")[0].split()[1] if not "port" in data: data["port"] = dict() data["port"][current_port] = dict() elif line.startswith("\t\t"): assert current_port is not None key, value = map(lambda x: x.strip(), line.split(":")) data["port"][current_port][key.replace(" ", "_")] = value elif line.startswith("\t"): key, value = map(lambda x: x.strip(), line.split(":")) data[key.replace(" ", "_")] = value return data def get_server_data(server): devices = get_devices(server) data = [run_ibstat_and_parse(server, device) for device in devices] return data def write_data(row, writer): port_details = row["port"]["1"] # Assuming each CA has "Port 1" writer.writerow( [ row["server"], row["device"], port_details["State"], port_details["Base_lid"], port_details["SM_lid"], port_details["Port_GUID"], ] ) def main(): writer = csv.writer(sys.stdout) writer.writerow(["server", "device", "State", "Base_lid", "SM_lid", "Port_GUID"]) for server in servers: for device_data in get_device_data(server): write_data(device_data, writer) if __name__ == "__main__": main()