rpc : do not wait for response when sending RPC_CMD_SET_TENSOR (llama/12943)

author Radoslav Gerganov <redacted>

Fri, 25 Apr 2025 07:08:08 +0000 (10:08 +0300)

committer Georgi Gerganov <redacted>

Thu, 1 May 2025 10:29:02 +0000 (13:29 +0300)
author Radoslav Gerganov <redacted>
Fri, 25 Apr 2025 07:08:08 +0000 (10:08 +0300)
committer Georgi Gerganov <redacted>
Thu, 1 May 2025 10:29:02 +0000 (13:29 +0300)
diff --git a/ggml/include/ggml-rpc.h b/ggml/include/ggml-rpc.h

index c8b6097f7e5730026ccf05241d8c2cf5b494efa3..1e674112767c9e9f6d641aa3c5fc2ff1629efa69 100644 (file)
--- a/ggml/include/ggml-rpc.h
+++ b/ggml/include/ggml-rpc.h
@@ -7,7 +7,7 @@
  extern "C" {
  #endif
  
-#define RPC_PROTO_MAJOR_VERSION    1
+#define RPC_PROTO_MAJOR_VERSION    2
  #define RPC_PROTO_MINOR_VERSION    0
  #define RPC_PROTO_PATCH_VERSION    0
  #define GGML_RPC_MAX_SERVERS       16
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp

index a0667b7d702b2ab7afa56f521c692a73e74b6a60..9023eb0919690ef4cd92f975c9fbe5bf0798dd79 100644 (file)
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
@@ -378,8 +378,8 @@ static bool parse_endpoint(const std::string & endpoint, std::string & host, int
  }
  
  // RPC request : | rpc_cmd (1 byte) | request_size (8 bytes) | request_data (request_size bytes) |
-// RPC response: | response_size (8 bytes) | response_data (response_size bytes) |
-static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cmd, const void * input, size_t input_size, void * output, size_t output_size) {
+// No response
+static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cmd, const void * input, size_t input_size) {
      uint8_t cmd_byte = cmd;
      if (!send_data(sock->fd, &cmd_byte, sizeof(cmd_byte))) {
          return false;
@@ -390,6 +390,15 @@ static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cm
      if (!send_data(sock->fd, input, input_size)) {
          return false;
      }
+    return true;
+}
+
+// RPC request : | rpc_cmd (1 byte) | request_size (8 bytes) | request_data (request_size bytes) |
+// RPC response: | response_size (8 bytes) | response_data (response_size bytes) |
+static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cmd, const void * input, size_t input_size, void * output, size_t output_size) {
+    if (!send_rpc_cmd(sock, cmd, input, input_size)) {
+        return false;
+    }
      // TODO: currently the output_size is always known, do we need support for commands with variable output size?
      // even if we do, we can skip sending output_size from the server for commands with known output size
      uint64_t out_size;
@@ -555,7 +564,7 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
      memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
      memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
      memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), data, size);
-    bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR, input.data(), input.size(), nullptr, 0);
+    bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR, input.data(), input.size());
      GGML_ASSERT(status);
  }
  
@@ -1428,9 +1437,6 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
                  if (!server.set_tensor(input)) {
                      return;
                  }
-                if (!send_msg(sockfd, nullptr, 0)) {
-                    return;
-                }
                  break;
              }
              case RPC_CMD_SET_TENSOR_HASH: {
author	Radoslav Gerganov <redacted>
	Fri, 25 Apr 2025 07:08:08 +0000 (10:08 +0300)
committer	Georgi Gerganov <redacted>
	Thu, 1 May 2025 10:29:02 +0000 (13:29 +0300)
ggml/include/ggml-rpc.h		patch \| blob \| history
ggml/src/ggml-rpc/ggml-rpc.cpp		patch \| blob \| history