Skip to content

Commit 156a5f7

Browse files
thomassaBob Ball
authored andcommitted
CP-20679: populate PGPU.compatibility_metadata
For Nvidia graphics processors, fetch the compatibility metadata from the driver and store it in the PGPU.compatibility field (a string-string map) under the "nvidia" key. Signed-off-by: Thomas Sanders <[email protected]>
1 parent 7db4f0e commit 156a5f7

File tree

5 files changed

+42
-0
lines changed

5 files changed

+42
-0
lines changed

ocaml/xapi/jbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ let () = Printf.ksprintf Jbuild_plugin.V1.send {|
9595
xcp.network
9696
xcp.v6
9797
xcp.memory
98+
xcp.gpumon
9899
xcp.updates
99100
rrdd-plugin
100101
xenopsd

ocaml/xapi/xapi_gpumon.ml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,15 @@ module Gpumon = Daemon_manager.Make(struct
3737
end)
3838

3939
let with_gpumon_stopped = Gpumon.with_daemon_stopped
40+
41+
module Nvidia = struct
42+
let key = "nvidia"
43+
44+
(* N.B. the pgpu must be in the local host where this function runs *)
45+
let get_pgpu_compatibility_metadata ~__context ~pgpu =
46+
let get = Gpumon_client.Client.Nvidia.get_pgpu_metadata in
47+
let pci = Db.PGPU.get_PCI ~__context ~self:pgpu in
48+
let address = Db.PCI.get_pci_id ~__context ~self:pci in
49+
[key, get "xapi_gpumon" address]
50+
51+
end (* Nvidia *)

ocaml/xapi/xapi_gpumon.mli

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,18 @@
1515
(** Stop gpumon if it's running, perform f, then start gpumon if
1616
* no other threads which require gpumon to be stopped are running. *)
1717
val with_gpumon_stopped : ?timeout:float -> (unit -> 'a) -> 'a
18+
19+
module Nvidia : sig
20+
(** The key used in the metadata assoc-list *)
21+
val key: string
22+
23+
(** Fetch metadata about the PGPU from the driver, and return
24+
* [(key, metadata)] where key is a fixed value and metadata
25+
* is the opaque string of data from the graphics driver.
26+
* IMPORTANT: This must be called on the host that has the GPU installed in it. *)
27+
val get_pgpu_compatibility_metadata:
28+
__context:Context.t ->
29+
pgpu:[ `PGPU ] API.Ref.t ->
30+
(string * string Gpumon_client.Client.RPCM.t) list
31+
32+
end

ocaml/xapi/xapi_pci.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ val is_class_of_kind : base_class -> int -> bool
2626
(** Get int value for PCI {class, vendor, device}_id *)
2727
val int_of_id : string -> int
2828

29+
(** Get string value of int form of PCI {class, vendor, device}_id
30+
* (the reverse of int_of_id) *)
31+
val id_of_int : int -> string
32+
2933
(** Get an identifier for this PCI device **)
3034
val string_of_pci : __context:Context.t -> self:API.ref_PCI -> string
3135

ocaml/xapi/xapi_pgpu.ml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ let calculate_max_capacities ~__context ~pCI ~size ~supported_VGPU_types =
2929
vgpu_type, max_capacity)
3030
supported_VGPU_types
3131

32+
let set_compatibility_metadata ~__context ~pgpu =
33+
let pci = Db.PGPU.get_PCI ~__context ~self:pgpu in
34+
if Db.PCI.get_vendor_id ~__context ~self:pci = Xapi_pci.id_of_int Xapi_vgpu_type.Nvidia.vendor_id then (
35+
let () = Db.PGPU.set_compatibility_metadata ~__context ~self:pgpu
36+
~value:(Xapi_gpumon.Nvidia.get_pgpu_compatibility_metadata ~__context ~pgpu)
37+
in ()
38+
)
39+
3240
let create ~__context ~pCI ~gPU_group ~host ~other_config
3341
~supported_VGPU_types ~size ~dom0_access
3442
~is_system_display_device =
@@ -46,6 +54,7 @@ let create ~__context ~pCI ~gPU_group ~host ~other_config
4654
~self:pgpu ~value:supported_VGPU_types;
4755
Db.PGPU.set_enabled_VGPU_types ~__context
4856
~self:pgpu ~value:supported_VGPU_types;
57+
set_compatibility_metadata ~__context ~pgpu;
4958
debug "PGPU ref='%s' created (host = '%s')" (Ref.string_of pgpu) (Ref.string_of host);
5059
pgpu
5160

@@ -182,6 +191,7 @@ let update_gpus ~__context =
182191
Db.PGPU.set_is_system_display_device ~__context
183192
~self:rf
184193
~value:is_system_display_device;
194+
set_compatibility_metadata ~__context ~pgpu:rf;
185195
(rf, rc)
186196
with Not_found ->
187197
(* If a new PCI has appeared then we know this is a system boot.

0 commit comments

Comments
 (0)