diff --git a/CMakeLists.txt b/CMakeLists.txt index e493b63f5..64ce36ed1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -634,6 +634,7 @@ if(LIBUV_BUILD_TESTS) test/test-socket-buffer-size.c test/test-spawn.c test/test-stdio-over-pipes.c + test/test-pipe-read-cancel-race.c test/test-strscpy.c test/test-strtok.c test/test-tcp-alloc-cb-fail.c diff --git a/Makefile.am b/Makefile.am index 87a0b0532..dcb9361d7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -259,6 +259,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \ test/test-socket-buffer-size.c \ test/test-spawn.c \ test/test-stdio-over-pipes.c \ + test/test-pipe-read-cancel-race.c \ test/test-strscpy.c \ test/test-strtok.c \ test/test-tcp-alloc-cb-fail.c \ diff --git a/src/win/pipe.c b/src/win/pipe.c index 8f86a1fee..cbb1572f8 100644 --- a/src/win/pipe.c +++ b/src/win/pipe.c @@ -2013,7 +2013,34 @@ static int uv__pipe_read_data(uv_loop_t* loop, } } } - more = *bytes_read == max_bytes; + /* Only indicate that more data may be available if we actually verify it. + * When bytes_read == max_bytes, the caller will loop back and call + * ReadFile again. If the pipe is empty at that point, ReadFile returns + * ERROR_IO_PENDING and we CancelIoEx the request. There is a Windows + * kernel race where data arriving on the pipe concurrently with the + * CancelIoEx can be drained from the pipe to satisfy the pending read, + * then silently discarded when the cancellation is applied. The read + * reports ERROR_OPERATION_ABORTED with 0 bytes transferred, but the + * data is no longer in the pipe either. + * Use PeekNamedPipe to confirm data is available before looping; if + * so, the next ReadFile will complete synchronously and we never enter + * the CancelIoEx path. */ + if (*bytes_read == max_bytes) { + bytes_available = 0; + if (PeekNamedPipe(handle->handle, + NULL, + 0, + NULL, + &bytes_available, + NULL) && + bytes_available > 0) { + more = 1; + } else { + more = 0; + } + } else { + more = 0; + } } /* Call the read callback. */ diff --git a/test/run-tests.c b/test/run-tests.c index 9c2574ebf..543476a07 100644 --- a/test/run-tests.c +++ b/test/run-tests.c @@ -54,6 +54,7 @@ int ipc_send_recv_helper(void); int ipc_helper_bind_twice(void); int ipc_helper_send_zero(void); int stdio_over_pipes_helper(void); +void pipe_read_cancel_race_helper(void); void spawn_stdin_stdout(void); void process_title_big_argv(void); int spawn_tcp_server_helper(void); @@ -127,6 +128,12 @@ static int maybe_run_test(int argc, char **argv) { return stdio_over_pipes_helper(); } + if (strcmp(argv[1], "pipe_read_cancel_race_helper") == 0) { + notify_parent_process(); + pipe_read_cancel_race_helper(); + return 0; + } + if (strcmp(argv[1], "spawn_helper1") == 0) { notify_parent_process(); return 1; diff --git a/test/test-list.h b/test/test-list.h index 4e8ce1aba..7b346efcd 100644 --- a/test/test-list.h +++ b/test/test-list.h @@ -80,6 +80,7 @@ TEST_DECLARE (tty_pty) TEST_DECLARE (tty_pty_partial) TEST_DECLARE (stdio_over_pipes) TEST_DECLARE (stdio_emulate_iocp) +TEST_DECLARE (pipe_read_cancel_race) TEST_DECLARE (ip6_pton) TEST_DECLARE (ip6_sin6_len) TEST_DECLARE (connect_unspecified) @@ -674,6 +675,7 @@ TASK_LIST_START TEST_ENTRY (tty_pty_partial) TEST_ENTRY (stdio_over_pipes) TEST_ENTRY (stdio_emulate_iocp) + TEST_ENTRY (pipe_read_cancel_race) TEST_ENTRY (ip6_pton) TEST_ENTRY (ip6_sin6_len) TEST_ENTRY (connect_unspecified) diff --git a/test/test-pipe-read-cancel-race.c b/test/test-pipe-read-cancel-race.c new file mode 100644 index 000000000..3d9ddce7d --- /dev/null +++ b/test/test-pipe-read-cancel-race.c @@ -0,0 +1,242 @@ +/* Copyright libuv contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This test reproduces a Windows-specific race in the CancelIoEx path of + * uv__pipe_read_data. When a child process writes chunks whose size exactly + * matches the parent's read buffer size, every successful ReadFile fills the + * entire buffer and libuv's read loop continues for another iteration. If the + * pipe is momentarily empty, the next ReadFile returns ERROR_IO_PENDING and is + * cancelled via CancelIoEx. There is a Windows kernel race where data arriving + * concurrently with CancelIoEx can be drained from the pipe's internal buffer + * to satisfy the pending read, then silently discarded when the cancellation + * is applied. The read reports 0 bytes transferred, but the data is gone from + * the pipe -- resulting in data loss. + * + * The repeated spawn + bulk transfer below is designed to hit this race window + * many times. Without the fix it typically fails within a handful of attempts + * on Windows; with the fix it should pass every attempt. + */ + +#include "uv.h" +#include "task.h" + +#include +#include +#include + +/* Chunk size for both the child's writes and the parent's read buffer. Any + * size works as long as both sides agree -- the bug triggers when a ReadFile + * returns exactly the requested byte count, regardless of the specific value. + * Keep it larger than the 64 KiB default pipe buffer to make the race window + * wider: each child write blocks partway through until the parent drains + * the pipe, naturally interleaving writer and reader activity. */ +#define CHUNK_SIZE (70 * 1024) +#define NUM_CHUNKS 300 +#define TOTAL_BYTES ((size_t) CHUNK_SIZE * NUM_CHUNKS) +#define NUM_ATTEMPTS 3 + + +static uv_process_t process; +static uv_pipe_t out_pipe; +static char* read_buf; +static size_t bytes_received; +static int attempt_failed; +static int process_exited; + + +static void close_cb(uv_handle_t* handle) { +} + + +static void exit_cb(uv_process_t* proc, + int64_t exit_status, + int term_signal) { + ASSERT_OK(exit_status); + ASSERT_OK(term_signal); + process_exited = 1; + uv_close((uv_handle_t*) proc, close_cb); +} + + +static void alloc_cb(uv_handle_t* handle, + size_t suggested_size, + uv_buf_t* buf) { + /* Return a buffer of exactly CHUNK_SIZE bytes. When the child writes + * CHUNK_SIZE-byte chunks, ReadFile returns exactly buf->len, the read + * loop continues, and the next iteration is likely to hit an empty pipe. */ + buf->base = read_buf; + buf->len = CHUNK_SIZE; +} + + +static void read_cb(uv_stream_t* stream, + ssize_t nread, + const uv_buf_t* buf) { + if (nread > 0) { + bytes_received += (size_t) nread; + } else if (nread == UV_EOF) { + uv_close((uv_handle_t*) stream, close_cb); + } else if (nread < 0) { + fprintf(stderr, "read_cb error: %s\n", uv_strerror((int) nread)); + attempt_failed = 1; + uv_close((uv_handle_t*) stream, close_cb); + } + /* nread == 0 means EAGAIN / cancelled -- ignore, libuv will retry. */ +} + + +static int run_one_attempt(uv_loop_t* loop) { + uv_process_options_t options; + uv_stdio_container_t stdio[3]; + char exepath[1024]; + size_t exepath_size; + char* args[3]; + int r; + + bytes_received = 0; + attempt_failed = 0; + process_exited = 0; + + exepath_size = sizeof(exepath); + r = uv_exepath(exepath, &exepath_size); + ASSERT_OK(r); + exepath[exepath_size] = '\0'; + + args[0] = exepath; + args[1] = "pipe_read_cancel_race_helper"; + args[2] = NULL; + + r = uv_pipe_init(loop, &out_pipe, 0); + ASSERT_OK(r); + + memset(&options, 0, sizeof(options)); + options.file = exepath; + options.args = args; + options.exit_cb = exit_cb; + options.stdio = stdio; + options.stdio_count = 3; + stdio[0].flags = UV_IGNORE; + stdio[1].flags = UV_CREATE_PIPE | UV_WRITABLE_PIPE; + stdio[1].data.stream = (uv_stream_t*) &out_pipe; + stdio[2].flags = UV_INHERIT_FD; + stdio[2].data.fd = 2; + + r = uv_spawn(loop, &process, &options); + ASSERT_OK(r); + + r = uv_read_start((uv_stream_t*) &out_pipe, alloc_cb, read_cb); + ASSERT_OK(r); + + r = uv_run(loop, UV_RUN_DEFAULT); + ASSERT_OK(r); + + ASSERT_EQ(1, process_exited); + + if (attempt_failed) + return -1; + + if (bytes_received != TOTAL_BYTES) { + fprintf(stderr, + "pipe data loss: expected %zu bytes, received %zu (missing %zu)\n", + TOTAL_BYTES, + bytes_received, + TOTAL_BYTES - bytes_received); + return -1; + } + + return 0; +} + + +TEST_IMPL(pipe_read_cancel_race) { + uv_loop_t* loop; + int attempt; + int r; + + read_buf = malloc(CHUNK_SIZE); + ASSERT_NOT_NULL(read_buf); + + loop = uv_default_loop(); + + for (attempt = 0; attempt < NUM_ATTEMPTS; attempt++) { + r = run_one_attempt(loop); + if (r != 0) { + fprintf(stderr, "attempt %d of %d FAILED\n", attempt + 1, NUM_ATTEMPTS); + free(read_buf); + ASSERT_OK(r); + } + } + + free(read_buf); + MAKE_VALGRIND_HAPPY(loop); + return 0; +} + + +/* Child process helper. Writes NUM_CHUNKS chunks of CHUNK_SIZE bytes to + * stdout using synchronous blocking writes, then exits. The synchronous + * writes are essential: they ensure each write lands as a single contiguous + * chunk from the pipe's perspective, so the parent's ReadFile consistently + * returns exactly CHUNK_SIZE bytes (triggering the more=1 code path). */ +void pipe_read_cancel_race_helper(void) { + uv_loop_t* loop; + uv_pipe_t stdout_pipe; + uv_write_t req; + uv_buf_t buf; + char* chunk; + int i; + int r; + + chunk = malloc(CHUNK_SIZE); + ASSERT_NOT_NULL(chunk); + memset(chunk, 'x', CHUNK_SIZE); + + loop = uv_default_loop(); + + r = uv_pipe_init(loop, &stdout_pipe, 0); + ASSERT_OK(r); + r = uv_pipe_open(&stdout_pipe, 1); + ASSERT_OK(r); + + /* Put the pipe in blocking-write mode so that each uv_write() call + * completes fully before returning control. This is the same approach + * Node.js / Bun use for stdout when it is a Windows pipe, and it + * guarantees the chunk boundary is preserved on the wire. */ + r = uv_stream_set_blocking((uv_stream_t*) &stdout_pipe, 1); + ASSERT_OK(r); + + buf = uv_buf_init(chunk, CHUNK_SIZE); + + for (i = 0; i < NUM_CHUNKS; i++) { + r = uv_write(&req, (uv_stream_t*) &stdout_pipe, &buf, 1, NULL); + ASSERT_OK(r); + /* With blocking mode on Windows, uv_write() performs a synchronous + * WriteFile and does not queue a request to be processed on the loop. + * Run the loop once to be safe on other platforms. */ + uv_run(loop, UV_RUN_NOWAIT); + } + + uv_close((uv_handle_t*) &stdout_pipe, NULL); + uv_run(loop, UV_RUN_DEFAULT); + uv_loop_close(loop); + + free(chunk); +}