menloresearch
diff --git a/‎common/arg.cpp
Lines changed: 1 addition & 1 deletion b/‎common/arg.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert_hf_to_gguf.py
Lines changed: 24 additions & 1 deletion b/‎convert_hf_to_gguf.py
Lines changed: 24 additions & 1 deletion
@@ -1979,7 +1979,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
     add_opt(common_arg(
         {"--host"}, "HOST",
-        string_format("ip address to listen (default: %s)", params.hostname.c_str()),
+        string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()),
         [](common_params & params, const std::string & value) {
             params.hostname = value;
         }
 
@@ -2269,7 +2269,7 @@ def set_gguf_parameters(self):
                 self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
 
 
-@Model.register("Qwen2VLForConditionalGeneration")
+@Model.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
 class Qwen2VLModel(Model):
     model_arch = gguf.MODEL_ARCH.QWEN2VL
 
@@ -4419,6 +4419,29 @@ def prepare_tensors(self):
                 raise ValueError(f"Unprocessed experts: {experts}")
 
 
+@Model.register("PLMForCausalLM")
+class PLMModel(Model):
+    model_arch = gguf.MODEL_ARCH.PLM
+
+    def set_vocab(self):
+        self._set_vocab_gpt2()
+
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        hparams = self.hparams
+        self.gguf_writer.add_vocab_size(hparams["vocab_size"])
+        self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
+        self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
+        self.gguf_writer.add_value_length(hparams["v_head_dim"])
+        self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        return [(self.map_tensor_name(name), data_torch)]
+
+    def prepare_tensors(self):
+        super().prepare_tensors()
+
+
 @Model.register("T5WithLMHeadModel")
 @Model.register("T5ForConditionalGeneration")
 @Model.register("MT5ForConditionalGeneration")
Original file line number	Diff line number	Diff line change
`@@ -1979,7 +1979,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex`
`1979`	`1979`	`).set_examples({LLAMA_EXAMPLE_EMBEDDING}));`
`1980`	`1980`	`add_opt(common_arg(`
`1981`	`1981`	`{"--host"}, "HOST",`
`1982`		`- string_format("ip address to listen (default: %s)", params.hostname.c_str()),`
	`1982`	`+ string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()),`
`1983`	`1983`	`[](common_params & params, const std::string & value) {`
`1984`	`1984`	`params.hostname = value;`
`1985`	`1985`	`}`