NPU needs to be initialized when starting a new process

lm-sys · merrymercy · Dec 28, 2023 · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
commit ed8069edade958e68e02ce9aece06bef6567d5a4
diff --git a/fastchat/serve/model_worker.py b/fastchat/serve/model_worker.py
@@ -31,7 +31,6 @@
     str_to_torch_dtype,
 )
 
-
 worker_id = str(uuid.uuid4())[:8]
 logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
 
@@ -101,6 +100,9 @@ def __init__(
             self.init_heart_beat()
 
     def generate_stream_gate(self, params):
+        if self.device == "npu":
+            import torch_npu
+            torch_npu.npu.set_device("npu:0")
         self.call_ct += 1
 
         try:
@@ -216,8 +218,8 @@ def get_embeddings(self, params):
                 all_embeddings = []
                 all_token_num = 0
                 for i in range(0, input_ids.size(1), self.context_len):
-                    chunk_input_ids = input_ids[:, i : i + self.context_len]
-                    chunk_attention_mask = attention_mask[:, i : i + self.context_len]
+                    chunk_input_ids = input_ids[:, i: i + self.context_len]
+                    chunk_attention_mask = attention_mask[:, i: i + self.context_len]
 
                     chunk_embeddings, token_num = self.__process_embed_chunk(
                         chunk_input_ids, chunk_attention_mask, **model_type_dict