diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aa447dad..cb725a02b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,6 +128,14 @@ if(EXISTS "${STDC_SHARED_LIB_SYMLINK}") message(STATUS "STDC_SHARED_LIB: ${STDC_SHARED_LIB}") endif() +option(OB_USE_WEPOLL "Windows: use wepoll (AFD/IOCP) instead of the legacy WSAPoll-based epoll shim" ON) +if(WIN32) + add_subdirectory(deps/oblib/src/lib/wepoll) + if(OB_USE_WEPOLL) + add_compile_definitions(OB_USE_WEPOLL=1) + endif() +endif() + add_subdirectory(deps/oblib) add_subdirectory(src/objit) add_subdirectory(src) diff --git a/NOTICE b/NOTICE index 14e0dcafd..8e143fc91 100644 --- a/NOTICE +++ b/NOTICE @@ -224,3 +224,34 @@ All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. An additional grant of patent rights can be found in the PATENTS file in the same directory + +==============================wepoll============================== + +wepoll - epoll for Windows +https://github.com/piscisaureus/wepoll + +Copyright 2012-2020, Bert Belder +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/oblib/src/CMakeLists.txt b/deps/oblib/src/CMakeLists.txt index d91adb147..6ca7fbdc9 100644 --- a/deps/oblib/src/CMakeLists.txt +++ b/deps/oblib/src/CMakeLists.txt @@ -43,6 +43,7 @@ target_include_directories( $<$:${VCPKG_INCLUDE_DIR}/mysql/mysql> $<$:${SQLITE_COMPAT_DIR}> $<$:${OB_VSAG_DIR}/include> + $<$,$>:${CMAKE_SOURCE_DIR}/deps/oblib/src/lib/wepoll> ${DEVTOOLS_DIR} ${DEVTOOLS_DIR}/include ${DEP_DIR}/include/apr-1/ @@ -100,7 +101,7 @@ if (OB_USE_CLANG) -Wno-ambiguous-reversed-operator -Wno-invalid-partial-specialization -Wno-string-concatenation /EHsc -Wno-missing-noreturn -Wno-undefined-func-template - -Wno-undef -Wno-sign-conversion -Wno-strict-prototypes -Wno-switch-default -Wno-language-extension-token + -Wno-undef -Wno-sign-conversion -Wno-strict-prototypes -Wno-switch-default -Wno-language-extension-token -Wno-missing-prototypes -Wno-extra-semi-stmt -Wno-unused-macros -Wno-unsafe-buffer-usage -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-reserved-identifier -Wno-padded -Wno-variadic-macros -Wno-global-constructors -Wno-undefined-reinterpret-cast -Wno-tautological-unsigned-zero-compare -Wno-integer-overflow -Wno-shift-count-overflow -Wno-cast-function-type -Wno-cast-function-type-strict @@ -110,8 +111,8 @@ if (OB_USE_CLANG) -Wno-exit-time-destructors -Wno-inconsistent-missing-destructor-override -Wno-shadow -Wno-unreachable-code -Wno-switch-enum -Wno-atomic-implicit-seq-cst -Wno-documentation-unknown-command -Wno-implicit-int-conversion -Wno-gnu-conditional-omitted-operand -Wno-cast-align -Wno-comma -Wno-tautological-type-limit-compare - -Wno-disabled-macro-expansion -Wno-unreachable-code-break -Wno-missing-variable-declarations - -Wno-nonportable-system-include-path -Wno-c99-extensions -Wno-flexible-array-extensions + -Wno-disabled-macro-expansion -Wno-unreachable-code-break -Wno-missing-variable-declarations + -Wno-nonportable-system-include-path -Wno-c99-extensions -Wno-flexible-array-extensions -Wno-compound-token-split-by-space -Wno-newline-eof -Wno-shadow-field -Wno-microsoft-unqualified-friend -Wno-float-equal -Wno-used-but-marked-unused -Wno-unreachable-code-return -Wno-float-conversion -Wno-conditional-uninitialized -Wno-signed-enum-bitfield -Wno-bitfield-enum-conversion -Wno-redundant-parens -Wno-gnu-statement-expression diff --git a/deps/oblib/src/lib/net/ob_epoll_compat.h b/deps/oblib/src/lib/net/ob_epoll_compat.h new file mode 100644 index 000000000..7381b5cb4 --- /dev/null +++ b/deps/oblib/src/lib/net/ob_epoll_compat.h @@ -0,0 +1,139 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the + * Mulan PubL v2. You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY + * KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_NET_OB_EPOLL_COMPAT_H_ +#define OCEANBASE_LIB_NET_OB_EPOLL_COMPAT_H_ + +// Cross-platform thin epoll surface used by ob_sql_nio.cpp. +// +// Linux: forwards to . +// Windows + OB_USE_WEPOLL: forwards to wepoll (AFD/IOCP-backed real epoll). +// Windows without OB_USE_WEPOLL: this header MUST NOT be included; callers +// fall back to the legacy WSAPoll-based shim defined inline in ob_sql_nio.cpp. +// +// Why a side table on Windows: +// - wepoll's HANDLE is wepoll's own port_state_t* (heap pointer), 8 bytes on +// x64. OB stores epfd as `int epfd_` in many places. We map int<->HANDLE +// through a small registry so callers do not change. + +#if defined(_WIN32) && defined(OB_USE_WEPOLL) + +#include +#include +#include +#include "wepoll.h" + +// wepoll already provides: struct epoll_event, epoll_data_t, EPOLLIN, +// EPOLLPRI, EPOLLOUT, EPOLLERR, EPOLLHUP, EPOLLRDHUP, EPOLLONESHOT, +// EPOLL_CTL_ADD/DEL/MOD. It does NOT define: +// +// - EPOLLET: wepoll only supports level-triggered mode. Callers that pass +// EPOLLET expect edge-triggered, but they'd already silently get LT under +// the legacy WSAPoll shim. We define EPOLLET as 0 so call sites compile +// and behave identically to the WSAPoll path. If true ET semantics are +// ever required on Windows, that needs a separate effort. +// - EPOLL_CLOEXEC: irrelevant on Windows (handles do not inherit by default). + +#ifndef EPOLLET +#define EPOLLET 0 +#endif +#ifndef EPOLL_CLOEXEC +#define EPOLL_CLOEXEC 0 +#endif + +namespace oceanbase { +namespace lib { + +class ObWepollRegistry +{ +public: + static ObWepollRegistry &instance() + { + static ObWepollRegistry inst; + return inst; + } + int add(HANDLE h) + { + std::lock_guard lg(mu_); + int id = next_id_++; + map_[id] = h; + return id; + } + HANDLE get(int id) const + { + std::lock_guard lg(mu_); + auto it = map_.find(id); + return it == map_.end() ? NULL : it->second; + } + HANDLE pop(int id) + { + std::lock_guard lg(mu_); + auto it = map_.find(id); + if (it == map_.end()) return NULL; + HANDLE h = it->second; + map_.erase(it); + return h; + } +private: + ObWepollRegistry() : next_id_(0x60000001) {} + mutable std::mutex mu_; + std::unordered_map map_; + int next_id_; +}; + +} // namespace lib +} // namespace oceanbase + +static inline int ob_wepoll_create1(int flags) +{ + HANDLE h = epoll_create1(flags); + if (h == NULL) return -1; + return ::oceanbase::lib::ObWepollRegistry::instance().add(h); +} + +static inline int ob_wepoll_ctl(int epfd, int op, int fd, struct epoll_event *ev) +{ + HANDLE h = ::oceanbase::lib::ObWepollRegistry::instance().get(epfd); + if (h == NULL) return -1; + return epoll_ctl(h, op, (SOCKET)fd, ev); +} + +static inline int ob_wepoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout) +{ + HANDLE h = ::oceanbase::lib::ObWepollRegistry::instance().get(epfd); + if (h == NULL) return -1; + return epoll_wait(h, events, maxevents, timeout); +} + +static inline int ob_wepoll_close(int epfd) +{ + HANDLE h = ::oceanbase::lib::ObWepollRegistry::instance().pop(epfd); + if (h == NULL) return -1; + return epoll_close(h); +} + +// Override the libc names within this translation unit's Windows branch so +// existing call sites compile unchanged. epoll_close is intentionally NOT +// macro-redefined: closesocket()-style cleanup is done via ob_wepoll_close +// from explicit teardown paths. +#define epoll_create1 ob_wepoll_create1 +#define epoll_ctl ob_wepoll_ctl +#define epoll_wait ob_wepoll_wait + +#elif !defined(_WIN32) + +#include + +#endif // _WIN32 + OB_USE_WEPOLL + +#endif // OCEANBASE_LIB_NET_OB_EPOLL_COMPAT_H_ diff --git a/deps/oblib/src/lib/wepoll/CMakeLists.txt b/deps/oblib/src/lib/wepoll/CMakeLists.txt new file mode 100644 index 000000000..915dbe410 --- /dev/null +++ b/deps/oblib/src/lib/wepoll/CMakeLists.txt @@ -0,0 +1,24 @@ +if(NOT WIN32) + return() +endif() + +set(_wepoll_src "${CMAKE_CURRENT_SOURCE_DIR}/wepoll.c") +set(_wepoll_hdr "${CMAKE_CURRENT_SOURCE_DIR}/wepoll.h") +if(NOT EXISTS "${_wepoll_src}" OR NOT EXISTS "${_wepoll_hdr}") + message(FATAL_ERROR + "wepoll source not found.\n" + " Drop wepoll.h and wepoll.c (BSD-2-Clause) from\n" + " https://github.com/piscisaureus/wepoll into ${CMAKE_CURRENT_SOURCE_DIR}/\n" + " See deps/oblib/src/lib/wepoll/README.md.") +endif() + +add_library(wepoll STATIC "${_wepoll_src}") +target_include_directories(wepoll PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +# wepoll resolves AFD ioctls at runtime via ntdll +target_link_libraries(wepoll PUBLIC ws2_32 ntdll) +# wepoll.c is third-party; do not let project -Werror flags fail its build +if(MSVC) + target_compile_options(wepoll PRIVATE /w) +else() + target_compile_options(wepoll PRIVATE -w) +endif() diff --git a/deps/oblib/src/lib/wepoll/README.md b/deps/oblib/src/lib/wepoll/README.md new file mode 100644 index 000000000..30e80373c --- /dev/null +++ b/deps/oblib/src/lib/wepoll/README.md @@ -0,0 +1,34 @@ +# wepoll + +Windows-only epoll implementation backed by AFD/IOCP. Used by OceanBase on +Windows to provide the `epoll_*` API surface that `ob_sql_nio.cpp` and friends +rely on. + +## Vendoring + +Two files are expected here: + +- `wepoll.h` +- `wepoll.c` + +Drop the latest tagged release from + (BSD-2-Clause) into this directory. +Do not modify; treat as upstream. + +## Build + +`CMakeLists.txt` in this directory builds a static `wepoll` library on Windows +only. It fails fast at configure time if the sources are missing so we do not +silently link an empty stub. + +Enable use of wepoll across the tree by passing `-DOB_USE_WEPOLL=ON` to CMake. +When unset, the legacy `WSAPoll`-based shim in `ob_sql_nio.cpp` remains the +default so we can A/B compare. + +## License + +BSD-2-Clause. Record the version drop here when updating: + +| Version | Date | Notes | +|---------|------|-------| +| | | | diff --git a/deps/oblib/src/lib/wepoll/wepoll.c b/deps/oblib/src/lib/wepoll/wepoll.c new file mode 100644 index 000000000..ab7f24045 --- /dev/null +++ b/deps/oblib/src/lib/wepoll/wepoll.c @@ -0,0 +1,2253 @@ +/* + * wepoll - epoll for Windows + * https://github.com/piscisaureus/wepoll + * + * Copyright 2012-2020, Bert Belder + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WEPOLL_EXPORT +#define WEPOLL_EXPORT +#endif + +#include + +enum EPOLL_EVENTS { + EPOLLIN = (int) (1U << 0), + EPOLLPRI = (int) (1U << 1), + EPOLLOUT = (int) (1U << 2), + EPOLLERR = (int) (1U << 3), + EPOLLHUP = (int) (1U << 4), + EPOLLRDNORM = (int) (1U << 6), + EPOLLRDBAND = (int) (1U << 7), + EPOLLWRNORM = (int) (1U << 8), + EPOLLWRBAND = (int) (1U << 9), + EPOLLMSG = (int) (1U << 10), /* Never reported. */ + EPOLLRDHUP = (int) (1U << 13), + EPOLLONESHOT = (int) (1U << 31) +}; + +#define EPOLLIN (1U << 0) +#define EPOLLPRI (1U << 1) +#define EPOLLOUT (1U << 2) +#define EPOLLERR (1U << 3) +#define EPOLLHUP (1U << 4) +#define EPOLLRDNORM (1U << 6) +#define EPOLLRDBAND (1U << 7) +#define EPOLLWRNORM (1U << 8) +#define EPOLLWRBAND (1U << 9) +#define EPOLLMSG (1U << 10) +#define EPOLLRDHUP (1U << 13) +#define EPOLLONESHOT (1U << 31) + +#define EPOLL_CTL_ADD 1 +#define EPOLL_CTL_MOD 2 +#define EPOLL_CTL_DEL 3 + +typedef void* HANDLE; +typedef uintptr_t SOCKET; + +typedef union epoll_data { + void* ptr; + int fd; + uint32_t u32; + uint64_t u64; + SOCKET sock; /* Windows specific */ + HANDLE hnd; /* Windows specific */ +} epoll_data_t; + +struct epoll_event { + uint32_t events; /* Epoll events and flags */ + epoll_data_t data; /* User data variable */ +}; + +#ifdef __cplusplus +extern "C" { +#endif + +WEPOLL_EXPORT HANDLE epoll_create(int size); +WEPOLL_EXPORT HANDLE epoll_create1(int flags); + +WEPOLL_EXPORT int epoll_close(HANDLE ephnd); + +WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd, + int op, + SOCKET sock, + struct epoll_event* event); + +WEPOLL_EXPORT int epoll_wait(HANDLE ephnd, + struct epoll_event* events, + int maxevents, + int timeout); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include + +#include + +#define WEPOLL_INTERNAL static +#define WEPOLL_INTERNAL_EXTERN static + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnonportable-system-include-path" +#pragma clang diagnostic ignored "-Wreserved-id-macro" +#elif defined(_MSC_VER) +#pragma warning(push, 1) +#endif + +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN + +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 + +#include +#include +#include + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif + +WEPOLL_INTERNAL int nt_global_init(void); + +typedef LONG NTSTATUS; +typedef NTSTATUS* PNTSTATUS; + +#ifndef NT_SUCCESS +#define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0) +#endif + +#ifndef STATUS_SUCCESS +#define STATUS_SUCCESS ((NTSTATUS) 0x00000000L) +#endif + +#ifndef STATUS_PENDING +#define STATUS_PENDING ((NTSTATUS) 0x00000103L) +#endif + +#ifndef STATUS_CANCELLED +#define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L) +#endif + +#ifndef STATUS_NOT_FOUND +#define STATUS_NOT_FOUND ((NTSTATUS) 0xC0000225L) +#endif + +typedef struct _IO_STATUS_BLOCK { + NTSTATUS Status; + ULONG_PTR Information; +} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; + +typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext, + PIO_STATUS_BLOCK IoStatusBlock, + ULONG Reserved); + +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWSTR Buffer; +} UNICODE_STRING, *PUNICODE_STRING; + +#define RTL_CONSTANT_STRING(s) \ + { sizeof(s) - sizeof((s)[0]), sizeof(s), s } + +typedef struct _OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + PUNICODE_STRING ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; + PVOID SecurityQualityOfService; +} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; + +#define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \ + { sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL } + +#ifndef FILE_OPEN +#define FILE_OPEN 0x00000001UL +#endif + +#define KEYEDEVENT_WAIT 0x00000001UL +#define KEYEDEVENT_WAKE 0x00000002UL +#define KEYEDEVENT_ALL_ACCESS \ + (STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE) + +#define NT_NTDLL_IMPORT_LIST(X) \ + X(NTSTATUS, \ + NTAPI, \ + NtCancelIoFileEx, \ + (HANDLE FileHandle, \ + PIO_STATUS_BLOCK IoRequestToCancel, \ + PIO_STATUS_BLOCK IoStatusBlock)) \ + \ + X(NTSTATUS, \ + NTAPI, \ + NtCreateFile, \ + (PHANDLE FileHandle, \ + ACCESS_MASK DesiredAccess, \ + POBJECT_ATTRIBUTES ObjectAttributes, \ + PIO_STATUS_BLOCK IoStatusBlock, \ + PLARGE_INTEGER AllocationSize, \ + ULONG FileAttributes, \ + ULONG ShareAccess, \ + ULONG CreateDisposition, \ + ULONG CreateOptions, \ + PVOID EaBuffer, \ + ULONG EaLength)) \ + \ + X(NTSTATUS, \ + NTAPI, \ + NtCreateKeyedEvent, \ + (PHANDLE KeyedEventHandle, \ + ACCESS_MASK DesiredAccess, \ + POBJECT_ATTRIBUTES ObjectAttributes, \ + ULONG Flags)) \ + \ + X(NTSTATUS, \ + NTAPI, \ + NtDeviceIoControlFile, \ + (HANDLE FileHandle, \ + HANDLE Event, \ + PIO_APC_ROUTINE ApcRoutine, \ + PVOID ApcContext, \ + PIO_STATUS_BLOCK IoStatusBlock, \ + ULONG IoControlCode, \ + PVOID InputBuffer, \ + ULONG InputBufferLength, \ + PVOID OutputBuffer, \ + ULONG OutputBufferLength)) \ + \ + X(NTSTATUS, \ + NTAPI, \ + NtReleaseKeyedEvent, \ + (HANDLE KeyedEventHandle, \ + PVOID KeyValue, \ + BOOLEAN Alertable, \ + PLARGE_INTEGER Timeout)) \ + \ + X(NTSTATUS, \ + NTAPI, \ + NtWaitForKeyedEvent, \ + (HANDLE KeyedEventHandle, \ + PVOID KeyValue, \ + BOOLEAN Alertable, \ + PLARGE_INTEGER Timeout)) \ + \ + X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status)) + +#define X(return_type, attributes, name, parameters) \ + WEPOLL_INTERNAL_EXTERN return_type(attributes* name) parameters; +NT_NTDLL_IMPORT_LIST(X) +#undef X + +#define AFD_POLL_RECEIVE 0x0001 +#define AFD_POLL_RECEIVE_EXPEDITED 0x0002 +#define AFD_POLL_SEND 0x0004 +#define AFD_POLL_DISCONNECT 0x0008 +#define AFD_POLL_ABORT 0x0010 +#define AFD_POLL_LOCAL_CLOSE 0x0020 +#define AFD_POLL_ACCEPT 0x0080 +#define AFD_POLL_CONNECT_FAIL 0x0100 + +typedef struct _AFD_POLL_HANDLE_INFO { + HANDLE Handle; + ULONG Events; + NTSTATUS Status; +} AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO; + +typedef struct _AFD_POLL_INFO { + LARGE_INTEGER Timeout; + ULONG NumberOfHandles; + ULONG Exclusive; + AFD_POLL_HANDLE_INFO Handles[1]; +} AFD_POLL_INFO, *PAFD_POLL_INFO; + +WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle, + HANDLE* afd_device_handle_out); + +WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle, + AFD_POLL_INFO* poll_info, + IO_STATUS_BLOCK* io_status_block); +WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle, + IO_STATUS_BLOCK* io_status_block); + +#define return_map_error(value) \ + do { \ + err_map_win_error(); \ + return (value); \ + } while (0) + +#define return_set_error(value, error) \ + do { \ + err_set_win_error(error); \ + return (value); \ + } while (0) + +WEPOLL_INTERNAL void err_map_win_error(void); +WEPOLL_INTERNAL void err_set_win_error(DWORD error); +WEPOLL_INTERNAL int err_check_handle(HANDLE handle); + +#define IOCTL_AFD_POLL 0x00012024 + +static UNICODE_STRING afd__device_name = + RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll"); + +static OBJECT_ATTRIBUTES afd__device_attributes = + RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0); + +int afd_create_device_handle(HANDLE iocp_handle, + HANDLE* afd_device_handle_out) { + HANDLE afd_device_handle; + IO_STATUS_BLOCK iosb; + NTSTATUS status; + + /* By opening \Device\Afd without specifying any extended attributes, we'll + * get a handle that lets us talk to the AFD driver, but that doesn't have an + * associated endpoint (so it's not a socket). */ + status = NtCreateFile(&afd_device_handle, + SYNCHRONIZE, + &afd__device_attributes, + &iosb, + NULL, + 0, + FILE_SHARE_READ | FILE_SHARE_WRITE, + FILE_OPEN, + 0, + NULL, + 0); + if (status != STATUS_SUCCESS) + return_set_error(-1, RtlNtStatusToDosError(status)); + + if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL) + goto error; + + if (!SetFileCompletionNotificationModes(afd_device_handle, + FILE_SKIP_SET_EVENT_ON_HANDLE)) + goto error; + + *afd_device_handle_out = afd_device_handle; + return 0; + +error: + CloseHandle(afd_device_handle); + return_map_error(-1); +} + +int afd_poll(HANDLE afd_device_handle, + AFD_POLL_INFO* poll_info, + IO_STATUS_BLOCK* io_status_block) { + NTSTATUS status; + + /* Blocking operation is not supported. */ + assert(io_status_block != NULL); + + io_status_block->Status = STATUS_PENDING; + status = NtDeviceIoControlFile(afd_device_handle, + NULL, + NULL, + io_status_block, + io_status_block, + IOCTL_AFD_POLL, + poll_info, + sizeof *poll_info, + poll_info, + sizeof *poll_info); + + if (status == STATUS_SUCCESS) + return 0; + else if (status == STATUS_PENDING) + return_set_error(-1, ERROR_IO_PENDING); + else + return_set_error(-1, RtlNtStatusToDosError(status)); +} + +int afd_cancel_poll(HANDLE afd_device_handle, + IO_STATUS_BLOCK* io_status_block) { + NTSTATUS cancel_status; + IO_STATUS_BLOCK cancel_iosb; + + /* If the poll operation has already completed or has been cancelled earlier, + * there's nothing left for us to do. */ + if (io_status_block->Status != STATUS_PENDING) + return 0; + + cancel_status = + NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb); + + /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed + * just before calling NtCancelIoFileEx(). This is not an error. */ + if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND) + return 0; + else + return_set_error(-1, RtlNtStatusToDosError(cancel_status)); +} + +WEPOLL_INTERNAL int epoll_global_init(void); + +WEPOLL_INTERNAL int init(void); + +typedef struct port_state port_state_t; +typedef struct queue queue_t; +typedef struct sock_state sock_state_t; +typedef struct ts_tree_node ts_tree_node_t; + +WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_handle_out); +WEPOLL_INTERNAL int port_close(port_state_t* port_state); +WEPOLL_INTERNAL int port_delete(port_state_t* port_state); + +WEPOLL_INTERNAL int port_wait(port_state_t* port_state, + struct epoll_event* events, + int maxevents, + int timeout); + +WEPOLL_INTERNAL int port_ctl(port_state_t* port_state, + int op, + SOCKET sock, + struct epoll_event* ev); + +WEPOLL_INTERNAL int port_register_socket(port_state_t* port_state, + sock_state_t* sock_state, + SOCKET socket); +WEPOLL_INTERNAL void port_unregister_socket(port_state_t* port_state, + sock_state_t* sock_state); +WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state, + SOCKET socket); + +WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state, + sock_state_t* sock_state); +WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state, + sock_state_t* sock_state); + +WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state, + sock_state_t* sock_state); +WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state, + sock_state_t* sock_state); + +WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state); +WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state); + +WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node( + ts_tree_node_t* tree_node); +WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node( + port_state_t* port_state); + +/* The reflock is a special kind of lock that normally prevents a chunk of + * memory from being freed, but does allow the chunk of memory to eventually be + * released in a coordinated fashion. + * + * Under normal operation, threads increase and decrease the reference count, + * which are wait-free operations. + * + * Exactly once during the reflock's lifecycle, a thread holding a reference to + * the lock may "destroy" the lock; this operation blocks until all other + * threads holding a reference to the lock have dereferenced it. After + * "destroy" returns, the calling thread may assume that no other threads have + * a reference to the lock. + * + * Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has + * been called is invalid and results in undefined behavior. Therefore the user + * should use another lock to guarantee that this can't happen. + */ + +typedef struct reflock { + volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */ +} reflock_t; + +WEPOLL_INTERNAL int reflock_global_init(void); + +WEPOLL_INTERNAL void reflock_init(reflock_t* reflock); +WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock); +WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock); +WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock); + +#include + +/* N.b.: the tree functions do not set errno or LastError when they fail. Each + * of the API functions has at most one failure mode. It is up to the caller to + * set an appropriate error code when necessary. */ + +typedef struct tree tree_t; +typedef struct tree_node tree_node_t; + +typedef struct tree { + tree_node_t* root; +} tree_t; + +typedef struct tree_node { + tree_node_t* left; + tree_node_t* right; + tree_node_t* parent; + uintptr_t key; + bool red; +} tree_node_t; + +WEPOLL_INTERNAL void tree_init(tree_t* tree); +WEPOLL_INTERNAL void tree_node_init(tree_node_t* node); + +WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key); +WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node); + +WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key); +WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree); + +typedef struct ts_tree { + tree_t tree; + SRWLOCK lock; +} ts_tree_t; + +typedef struct ts_tree_node { + tree_node_t tree_node; + reflock_t reflock; +} ts_tree_node_t; + +WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl); +WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node); + +WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree, + ts_tree_node_t* node, + uintptr_t key); + +WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, + uintptr_t key); +WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, + uintptr_t key); + +WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node); +WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node); + +static ts_tree_t epoll__handle_tree; + +int epoll_global_init(void) { + ts_tree_init(&epoll__handle_tree); + return 0; +} + +static HANDLE epoll__create(void) { + port_state_t* port_state; + HANDLE ephnd; + ts_tree_node_t* tree_node; + + if (init() < 0) + return NULL; + + port_state = port_new(&ephnd); + if (port_state == NULL) + return NULL; + + tree_node = port_state_to_handle_tree_node(port_state); + if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) { + /* This should never happen. */ + port_delete(port_state); + return_set_error(NULL, ERROR_ALREADY_EXISTS); + } + + return ephnd; +} + +HANDLE epoll_create(int size) { + if (size <= 0) + return_set_error(NULL, ERROR_INVALID_PARAMETER); + + return epoll__create(); +} + +HANDLE epoll_create1(int flags) { + if (flags != 0) + return_set_error(NULL, ERROR_INVALID_PARAMETER); + + return epoll__create(); +} + +int epoll_close(HANDLE ephnd) { + ts_tree_node_t* tree_node; + port_state_t* port_state; + + if (init() < 0) + return -1; + + tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + if (tree_node == NULL) { + err_set_win_error(ERROR_INVALID_PARAMETER); + goto err; + } + + port_state = port_state_from_handle_tree_node(tree_node); + port_close(port_state); + + ts_tree_node_unref_and_destroy(tree_node); + + return port_delete(port_state); + +err: + err_check_handle(ephnd); + return -1; +} + +int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) { + ts_tree_node_t* tree_node; + port_state_t* port_state; + int r; + + if (init() < 0) + return -1; + + tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + if (tree_node == NULL) { + err_set_win_error(ERROR_INVALID_PARAMETER); + goto err; + } + + port_state = port_state_from_handle_tree_node(tree_node); + r = port_ctl(port_state, op, sock, ev); + + ts_tree_node_unref(tree_node); + + if (r < 0) + goto err; + + return 0; + +err: + /* On Linux, in the case of epoll_ctl(), EBADF takes priority over other + * errors. Wepoll mimics this behavior. */ + err_check_handle(ephnd); + err_check_handle((HANDLE) sock); + return -1; +} + +int epoll_wait(HANDLE ephnd, + struct epoll_event* events, + int maxevents, + int timeout) { + ts_tree_node_t* tree_node; + port_state_t* port_state; + int num_events; + + if (maxevents <= 0) + return_set_error(-1, ERROR_INVALID_PARAMETER); + + if (init() < 0) + return -1; + + tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd); + if (tree_node == NULL) { + err_set_win_error(ERROR_INVALID_PARAMETER); + goto err; + } + + port_state = port_state_from_handle_tree_node(tree_node); + num_events = port_wait(port_state, events, maxevents, timeout); + + ts_tree_node_unref(tree_node); + + if (num_events < 0) + goto err; + + return num_events; + +err: + err_check_handle(ephnd); + return -1; +} + +#include + +#define ERR__ERRNO_MAPPINGS(X) \ + X(ERROR_ACCESS_DENIED, EACCES) \ + X(ERROR_ALREADY_EXISTS, EEXIST) \ + X(ERROR_BAD_COMMAND, EACCES) \ + X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \ + X(ERROR_BAD_LENGTH, EACCES) \ + X(ERROR_BAD_NETPATH, ENOENT) \ + X(ERROR_BAD_NET_NAME, ENOENT) \ + X(ERROR_BAD_NET_RESP, ENETDOWN) \ + X(ERROR_BAD_PATHNAME, ENOENT) \ + X(ERROR_BROKEN_PIPE, EPIPE) \ + X(ERROR_CANNOT_MAKE, EACCES) \ + X(ERROR_COMMITMENT_LIMIT, ENOMEM) \ + X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \ + X(ERROR_CONNECTION_ACTIVE, EISCONN) \ + X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \ + X(ERROR_CRC, EACCES) \ + X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \ + X(ERROR_DISK_FULL, ENOSPC) \ + X(ERROR_DUP_NAME, EADDRINUSE) \ + X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \ + X(ERROR_FILE_NOT_FOUND, ENOENT) \ + X(ERROR_GEN_FAILURE, EACCES) \ + X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \ + X(ERROR_HOST_DOWN, EHOSTUNREACH) \ + X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \ + X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \ + X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \ + X(ERROR_INVALID_FUNCTION, EINVAL) \ + X(ERROR_INVALID_HANDLE, EBADF) \ + X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \ + X(ERROR_INVALID_PARAMETER, EINVAL) \ + X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \ + X(ERROR_IO_PENDING, EINPROGRESS) \ + X(ERROR_LOCK_VIOLATION, EACCES) \ + X(ERROR_MORE_DATA, EMSGSIZE) \ + X(ERROR_NETNAME_DELETED, ECONNABORTED) \ + X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \ + X(ERROR_NETWORK_BUSY, ENETDOWN) \ + X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \ + X(ERROR_NOACCESS, EFAULT) \ + X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \ + X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \ + X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \ + X(ERROR_NOT_FOUND, ENOENT) \ + X(ERROR_NOT_LOCKED, EACCES) \ + X(ERROR_NOT_READY, EACCES) \ + X(ERROR_NOT_SAME_DEVICE, EXDEV) \ + X(ERROR_NOT_SUPPORTED, ENOTSUP) \ + X(ERROR_NO_MORE_FILES, ENOENT) \ + X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \ + X(ERROR_OPERATION_ABORTED, EINTR) \ + X(ERROR_OUT_OF_PAPER, EACCES) \ + X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \ + X(ERROR_PAGEFILE_QUOTA, ENOMEM) \ + X(ERROR_PATH_NOT_FOUND, ENOENT) \ + X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \ + X(ERROR_PORT_UNREACHABLE, ECONNRESET) \ + X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \ + X(ERROR_REM_NOT_LIST, ECONNREFUSED) \ + X(ERROR_REQUEST_ABORTED, EINTR) \ + X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \ + X(ERROR_SECTOR_NOT_FOUND, EACCES) \ + X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \ + X(ERROR_SHARING_VIOLATION, EACCES) \ + X(ERROR_TOO_MANY_NAMES, ENOMEM) \ + X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \ + X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \ + X(ERROR_WAIT_NO_CHILDREN, ECHILD) \ + X(ERROR_WORKING_SET_QUOTA, ENOMEM) \ + X(ERROR_WRITE_PROTECT, EACCES) \ + X(ERROR_WRONG_DISK, EACCES) \ + X(WSAEACCES, EACCES) \ + X(WSAEADDRINUSE, EADDRINUSE) \ + X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \ + X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \ + X(WSAECONNABORTED, ECONNABORTED) \ + X(WSAECONNREFUSED, ECONNREFUSED) \ + X(WSAECONNRESET, ECONNRESET) \ + X(WSAEDISCON, EPIPE) \ + X(WSAEFAULT, EFAULT) \ + X(WSAEHOSTDOWN, EHOSTUNREACH) \ + X(WSAEHOSTUNREACH, EHOSTUNREACH) \ + X(WSAEINPROGRESS, EBUSY) \ + X(WSAEINTR, EINTR) \ + X(WSAEINVAL, EINVAL) \ + X(WSAEISCONN, EISCONN) \ + X(WSAEMSGSIZE, EMSGSIZE) \ + X(WSAENETDOWN, ENETDOWN) \ + X(WSAENETRESET, EHOSTUNREACH) \ + X(WSAENETUNREACH, ENETUNREACH) \ + X(WSAENOBUFS, ENOMEM) \ + X(WSAENOTCONN, ENOTCONN) \ + X(WSAENOTSOCK, ENOTSOCK) \ + X(WSAEOPNOTSUPP, EOPNOTSUPP) \ + X(WSAEPROCLIM, ENOMEM) \ + X(WSAESHUTDOWN, EPIPE) \ + X(WSAETIMEDOUT, ETIMEDOUT) \ + X(WSAEWOULDBLOCK, EWOULDBLOCK) \ + X(WSANOTINITIALISED, ENETDOWN) \ + X(WSASYSNOTREADY, ENETDOWN) \ + X(WSAVERNOTSUPPORTED, ENOSYS) + +static errno_t err__map_win_error_to_errno(DWORD error) { + switch (error) { +#define X(error_sym, errno_sym) \ + case error_sym: \ + return errno_sym; + ERR__ERRNO_MAPPINGS(X) +#undef X + } + return EINVAL; +} + +void err_map_win_error(void) { + errno = err__map_win_error_to_errno(GetLastError()); +} + +void err_set_win_error(DWORD error) { + SetLastError(error); + errno = err__map_win_error_to_errno(error); +} + +int err_check_handle(HANDLE handle) { + DWORD flags; + + /* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check + * for this condition explicitly. */ + if (handle == INVALID_HANDLE_VALUE) + return_set_error(-1, ERROR_INVALID_HANDLE); + + if (!GetHandleInformation(handle, &flags)) + return_map_error(-1); + + return 0; +} + +#include + +#define array_count(a) (sizeof(a) / (sizeof((a)[0]))) + +#define container_of(ptr, type, member) \ + ((type*) ((uintptr_t) (ptr) - offsetof(type, member))) + +#define unused_var(v) ((void) (v)) + +/* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define inline __inline +#endif + +WEPOLL_INTERNAL int ws_global_init(void); +WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket); + +static bool init__done = false; +static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT; + +static BOOL CALLBACK init__once_callback(INIT_ONCE* once, + void* parameter, + void** context) { + unused_var(once); + unused_var(parameter); + unused_var(context); + + /* N.b. that initialization order matters here. */ + if (ws_global_init() < 0 || nt_global_init() < 0 || + reflock_global_init() < 0 || epoll_global_init() < 0) + return FALSE; + + init__done = true; + return TRUE; +} + +int init(void) { + if (!init__done && + !InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL)) + /* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any + * error code when the once-callback returns FALSE. We return -1 here to + * indicate that global initialization failed; the failing init function is + * resposible for setting `errno` and calling `SetLastError()`. */ + return -1; + + return 0; +} + +/* Set up a workaround for the following problem: + * FARPROC addr = GetProcAddress(...); + * MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error. + * MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error. + * To compile cleanly with either compiler, do casts with this "bridge" type: + * MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */ +#ifdef __GNUC__ +typedef void* nt__fn_ptr_cast_t; +#else +typedef FARPROC nt__fn_ptr_cast_t; +#endif + +#define X(return_type, attributes, name, parameters) \ + WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL; +NT_NTDLL_IMPORT_LIST(X) +#undef X + +int nt_global_init(void) { + HMODULE ntdll; + FARPROC fn_ptr; + + ntdll = GetModuleHandleW(L"ntdll.dll"); + if (ntdll == NULL) + return -1; + +#define X(return_type, attributes, name, parameters) \ + fn_ptr = GetProcAddress(ntdll, #name); \ + if (fn_ptr == NULL) \ + return -1; \ + name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr; + NT_NTDLL_IMPORT_LIST(X) +#undef X + + return 0; +} + +#include + +typedef struct poll_group poll_group_t; + +typedef struct queue_node queue_node_t; + +WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port); +WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group); + +WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group); + +WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node( + queue_node_t* queue_node); +WEPOLL_INTERNAL HANDLE + poll_group_get_afd_device_handle(poll_group_t* poll_group); + +typedef struct queue_node { + queue_node_t* prev; + queue_node_t* next; +} queue_node_t; + +typedef struct queue { + queue_node_t head; +} queue_t; + +WEPOLL_INTERNAL void queue_init(queue_t* queue); +WEPOLL_INTERNAL void queue_node_init(queue_node_t* node); + +WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue); +WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue); + +WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node); +WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node); +WEPOLL_INTERNAL void queue_move_to_start(queue_t* queue, queue_node_t* node); +WEPOLL_INTERNAL void queue_move_to_end(queue_t* queue, queue_node_t* node); +WEPOLL_INTERNAL void queue_remove(queue_node_t* node); + +WEPOLL_INTERNAL bool queue_is_empty(const queue_t* queue); +WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t* node); + +#define POLL_GROUP__MAX_GROUP_SIZE 32 + +typedef struct poll_group { + port_state_t* port_state; + queue_node_t queue_node; + HANDLE afd_device_handle; + size_t group_size; +} poll_group_t; + +static poll_group_t* poll_group__new(port_state_t* port_state) { + HANDLE iocp_handle = port_get_iocp_handle(port_state); + queue_t* poll_group_queue = port_get_poll_group_queue(port_state); + + poll_group_t* poll_group = malloc(sizeof *poll_group); + if (poll_group == NULL) + return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); + + memset(poll_group, 0, sizeof *poll_group); + + queue_node_init(&poll_group->queue_node); + poll_group->port_state = port_state; + + if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) < + 0) { + free(poll_group); + return NULL; + } + + queue_append(poll_group_queue, &poll_group->queue_node); + + return poll_group; +} + +void poll_group_delete(poll_group_t* poll_group) { + assert(poll_group->group_size == 0); + CloseHandle(poll_group->afd_device_handle); + queue_remove(&poll_group->queue_node); + free(poll_group); +} + +poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) { + return container_of(queue_node, poll_group_t, queue_node); +} + +HANDLE poll_group_get_afd_device_handle(poll_group_t* poll_group) { + return poll_group->afd_device_handle; +} + +poll_group_t* poll_group_acquire(port_state_t* port_state) { + queue_t* poll_group_queue = port_get_poll_group_queue(port_state); + poll_group_t* poll_group = + !queue_is_empty(poll_group_queue) + ? container_of( + queue_last(poll_group_queue), poll_group_t, queue_node) + : NULL; + + if (poll_group == NULL || + poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE) + poll_group = poll_group__new(port_state); + if (poll_group == NULL) + return NULL; + + if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE) + queue_move_to_start(poll_group_queue, &poll_group->queue_node); + + return poll_group; +} + +void poll_group_release(poll_group_t* poll_group) { + port_state_t* port_state = poll_group->port_state; + queue_t* poll_group_queue = port_get_poll_group_queue(port_state); + + poll_group->group_size--; + assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE); + + queue_move_to_end(poll_group_queue, &poll_group->queue_node); + + /* Poll groups are currently only freed when the epoll port is closed. */ +} + +WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state, + SOCKET socket); +WEPOLL_INTERNAL void sock_delete(port_state_t* port_state, + sock_state_t* sock_state); +WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state, + sock_state_t* sock_state); + +WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state, + sock_state_t* sock_state, + const struct epoll_event* ev); + +WEPOLL_INTERNAL int sock_update(port_state_t* port_state, + sock_state_t* sock_state); +WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state, + IO_STATUS_BLOCK* io_status_block, + struct epoll_event* ev); + +WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node( + queue_node_t* queue_node); +WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node( + sock_state_t* sock_state); +WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node( + tree_node_t* tree_node); +WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state); + +#define PORT__MAX_ON_STACK_COMPLETIONS 256 + +typedef struct port_state { + HANDLE iocp_handle; + tree_t sock_tree; + queue_t sock_update_queue; + queue_t sock_deleted_queue; + queue_t poll_group_queue; + ts_tree_node_t handle_tree_node; + CRITICAL_SECTION lock; + size_t active_poll_count; +} port_state_t; + +static inline port_state_t* port__alloc(void) { + port_state_t* port_state = malloc(sizeof *port_state); + if (port_state == NULL) + return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); + + return port_state; +} + +static inline void port__free(port_state_t* port) { + assert(port != NULL); + free(port); +} + +static inline HANDLE port__create_iocp(void) { + HANDLE iocp_handle = + CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (iocp_handle == NULL) + return_map_error(NULL); + + return iocp_handle; +} + +port_state_t* port_new(HANDLE* iocp_handle_out) { + port_state_t* port_state; + HANDLE iocp_handle; + + port_state = port__alloc(); + if (port_state == NULL) + goto err1; + + iocp_handle = port__create_iocp(); + if (iocp_handle == NULL) + goto err2; + + memset(port_state, 0, sizeof *port_state); + + port_state->iocp_handle = iocp_handle; + tree_init(&port_state->sock_tree); + queue_init(&port_state->sock_update_queue); + queue_init(&port_state->sock_deleted_queue); + queue_init(&port_state->poll_group_queue); + ts_tree_node_init(&port_state->handle_tree_node); + InitializeCriticalSection(&port_state->lock); + + *iocp_handle_out = iocp_handle; + return port_state; + +err2: + port__free(port_state); +err1: + return NULL; +} + +static inline int port__close_iocp(port_state_t* port_state) { + HANDLE iocp_handle = port_state->iocp_handle; + port_state->iocp_handle = NULL; + + if (!CloseHandle(iocp_handle)) + return_map_error(-1); + + return 0; +} + +int port_close(port_state_t* port_state) { + int result; + + EnterCriticalSection(&port_state->lock); + result = port__close_iocp(port_state); + LeaveCriticalSection(&port_state->lock); + + return result; +} + +int port_delete(port_state_t* port_state) { + tree_node_t* tree_node; + queue_node_t* queue_node; + + /* At this point the IOCP port should have been closed. */ + assert(port_state->iocp_handle == NULL); + + while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) { + sock_state_t* sock_state = sock_state_from_tree_node(tree_node); + sock_force_delete(port_state, sock_state); + } + + while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) { + sock_state_t* sock_state = sock_state_from_queue_node(queue_node); + sock_force_delete(port_state, sock_state); + } + + while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) { + poll_group_t* poll_group = poll_group_from_queue_node(queue_node); + poll_group_delete(poll_group); + } + + assert(queue_is_empty(&port_state->sock_update_queue)); + + DeleteCriticalSection(&port_state->lock); + + port__free(port_state); + + return 0; +} + +static int port__update_events(port_state_t* port_state) { + queue_t* sock_update_queue = &port_state->sock_update_queue; + + /* Walk the queue, submitting new poll requests for every socket that needs + * it. */ + while (!queue_is_empty(sock_update_queue)) { + queue_node_t* queue_node = queue_first(sock_update_queue); + sock_state_t* sock_state = sock_state_from_queue_node(queue_node); + + if (sock_update(port_state, sock_state) < 0) + return -1; + + /* sock_update() removes the socket from the update queue. */ + } + + return 0; +} + +static inline void port__update_events_if_polling(port_state_t* port_state) { + if (port_state->active_poll_count > 0) + port__update_events(port_state); +} + +static inline int port__feed_events(port_state_t* port_state, + struct epoll_event* epoll_events, + OVERLAPPED_ENTRY* iocp_events, + DWORD iocp_event_count) { + int epoll_event_count = 0; + DWORD i; + + for (i = 0; i < iocp_event_count; i++) { + IO_STATUS_BLOCK* io_status_block = + (IO_STATUS_BLOCK*) iocp_events[i].lpOverlapped; + struct epoll_event* ev = &epoll_events[epoll_event_count]; + + epoll_event_count += sock_feed_event(port_state, io_status_block, ev); + } + + return epoll_event_count; +} + +static inline int port__poll(port_state_t* port_state, + struct epoll_event* epoll_events, + OVERLAPPED_ENTRY* iocp_events, + DWORD maxevents, + DWORD timeout) { + DWORD completion_count; + + if (port__update_events(port_state) < 0) + return -1; + + port_state->active_poll_count++; + + LeaveCriticalSection(&port_state->lock); + + BOOL r = GetQueuedCompletionStatusEx(port_state->iocp_handle, + iocp_events, + maxevents, + &completion_count, + timeout, + FALSE); + + EnterCriticalSection(&port_state->lock); + + port_state->active_poll_count--; + + if (!r) + return_map_error(-1); + + return port__feed_events( + port_state, epoll_events, iocp_events, completion_count); +} + +int port_wait(port_state_t* port_state, + struct epoll_event* events, + int maxevents, + int timeout) { + OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS]; + OVERLAPPED_ENTRY* iocp_events; + uint64_t due = 0; + DWORD gqcs_timeout; + int result; + + /* Check whether `maxevents` is in range. */ + if (maxevents <= 0) + return_set_error(-1, ERROR_INVALID_PARAMETER); + + /* Decide whether the IOCP completion list can live on the stack, or allocate + * memory for it on the heap. */ + if ((size_t) maxevents <= array_count(stack_iocp_events)) { + iocp_events = stack_iocp_events; + } else if ((iocp_events = + malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) { + iocp_events = stack_iocp_events; + maxevents = array_count(stack_iocp_events); + } + + /* Compute the timeout for GetQueuedCompletionStatus, and the wait end + * time, if the user specified a timeout other than zero or infinite. */ + if (timeout > 0) { + due = GetTickCount64() + (uint64_t) timeout; + gqcs_timeout = (DWORD) timeout; + } else if (timeout == 0) { + gqcs_timeout = 0; + } else { + gqcs_timeout = INFINITE; + } + + EnterCriticalSection(&port_state->lock); + + /* Dequeue completion packets until either at least one interesting event + * has been discovered, or the timeout is reached. */ + for (;;) { + uint64_t now; + + result = port__poll( + port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout); + if (result < 0 || result > 0) + break; /* Result, error, or time-out. */ + + if (timeout < 0) + continue; /* When timeout is negative, never time out. */ + + /* Update time. */ + now = GetTickCount64(); + + /* Do not allow the due time to be in the past. */ + if (now >= due) { + SetLastError(WAIT_TIMEOUT); + break; + } + + /* Recompute time-out argument for GetQueuedCompletionStatus. */ + gqcs_timeout = (DWORD)(due - now); + } + + port__update_events_if_polling(port_state); + + LeaveCriticalSection(&port_state->lock); + + if (iocp_events != stack_iocp_events) + free(iocp_events); + + if (result >= 0) + return result; + else if (GetLastError() == WAIT_TIMEOUT) + return 0; + else + return -1; +} + +static inline int port__ctl_add(port_state_t* port_state, + SOCKET sock, + struct epoll_event* ev) { + sock_state_t* sock_state = sock_new(port_state, sock); + if (sock_state == NULL) + return -1; + + if (sock_set_event(port_state, sock_state, ev) < 0) { + sock_delete(port_state, sock_state); + return -1; + } + + port__update_events_if_polling(port_state); + + return 0; +} + +static inline int port__ctl_mod(port_state_t* port_state, + SOCKET sock, + struct epoll_event* ev) { + sock_state_t* sock_state = port_find_socket(port_state, sock); + if (sock_state == NULL) + return -1; + + if (sock_set_event(port_state, sock_state, ev) < 0) + return -1; + + port__update_events_if_polling(port_state); + + return 0; +} + +static inline int port__ctl_del(port_state_t* port_state, SOCKET sock) { + sock_state_t* sock_state = port_find_socket(port_state, sock); + if (sock_state == NULL) + return -1; + + sock_delete(port_state, sock_state); + + return 0; +} + +static inline int port__ctl_op(port_state_t* port_state, + int op, + SOCKET sock, + struct epoll_event* ev) { + switch (op) { + case EPOLL_CTL_ADD: + return port__ctl_add(port_state, sock, ev); + case EPOLL_CTL_MOD: + return port__ctl_mod(port_state, sock, ev); + case EPOLL_CTL_DEL: + return port__ctl_del(port_state, sock); + default: + return_set_error(-1, ERROR_INVALID_PARAMETER); + } +} + +int port_ctl(port_state_t* port_state, + int op, + SOCKET sock, + struct epoll_event* ev) { + int result; + + EnterCriticalSection(&port_state->lock); + result = port__ctl_op(port_state, op, sock, ev); + LeaveCriticalSection(&port_state->lock); + + return result; +} + +int port_register_socket(port_state_t* port_state, + sock_state_t* sock_state, + SOCKET socket) { + if (tree_add(&port_state->sock_tree, + sock_state_to_tree_node(sock_state), + socket) < 0) + return_set_error(-1, ERROR_ALREADY_EXISTS); + return 0; +} + +void port_unregister_socket(port_state_t* port_state, + sock_state_t* sock_state) { + tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state)); +} + +sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) { + tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket); + if (tree_node == NULL) + return_set_error(NULL, ERROR_NOT_FOUND); + return sock_state_from_tree_node(tree_node); +} + +void port_request_socket_update(port_state_t* port_state, + sock_state_t* sock_state) { + if (queue_is_enqueued(sock_state_to_queue_node(sock_state))) + return; + queue_append(&port_state->sock_update_queue, + sock_state_to_queue_node(sock_state)); +} + +void port_cancel_socket_update(port_state_t* port_state, + sock_state_t* sock_state) { + unused_var(port_state); + if (!queue_is_enqueued(sock_state_to_queue_node(sock_state))) + return; + queue_remove(sock_state_to_queue_node(sock_state)); +} + +void port_add_deleted_socket(port_state_t* port_state, + sock_state_t* sock_state) { + if (queue_is_enqueued(sock_state_to_queue_node(sock_state))) + return; + queue_append(&port_state->sock_deleted_queue, + sock_state_to_queue_node(sock_state)); +} + +void port_remove_deleted_socket(port_state_t* port_state, + sock_state_t* sock_state) { + unused_var(port_state); + if (!queue_is_enqueued(sock_state_to_queue_node(sock_state))) + return; + queue_remove(sock_state_to_queue_node(sock_state)); +} + +HANDLE port_get_iocp_handle(port_state_t* port_state) { + assert(port_state->iocp_handle != NULL); + return port_state->iocp_handle; +} + +queue_t* port_get_poll_group_queue(port_state_t* port_state) { + return &port_state->poll_group_queue; +} + +port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) { + return container_of(tree_node, port_state_t, handle_tree_node); +} + +ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) { + return &port_state->handle_tree_node; +} + +void queue_init(queue_t* queue) { + queue_node_init(&queue->head); +} + +void queue_node_init(queue_node_t* node) { + node->prev = node; + node->next = node; +} + +static inline void queue__detach_node(queue_node_t* node) { + node->prev->next = node->next; + node->next->prev = node->prev; +} + +queue_node_t* queue_first(const queue_t* queue) { + return !queue_is_empty(queue) ? queue->head.next : NULL; +} + +queue_node_t* queue_last(const queue_t* queue) { + return !queue_is_empty(queue) ? queue->head.prev : NULL; +} + +void queue_prepend(queue_t* queue, queue_node_t* node) { + node->next = queue->head.next; + node->prev = &queue->head; + node->next->prev = node; + queue->head.next = node; +} + +void queue_append(queue_t* queue, queue_node_t* node) { + node->next = &queue->head; + node->prev = queue->head.prev; + node->prev->next = node; + queue->head.prev = node; +} + +void queue_move_to_start(queue_t* queue, queue_node_t* node) { + queue__detach_node(node); + queue_prepend(queue, node); +} + +void queue_move_to_end(queue_t* queue, queue_node_t* node) { + queue__detach_node(node); + queue_append(queue, node); +} + +void queue_remove(queue_node_t* node) { + queue__detach_node(node); + queue_node_init(node); +} + +bool queue_is_empty(const queue_t* queue) { + return !queue_is_enqueued(&queue->head); +} + +bool queue_is_enqueued(const queue_node_t* node) { + return node->prev != node; +} + +#define REFLOCK__REF ((long) 0x00000001UL) +#define REFLOCK__REF_MASK ((long) 0x0fffffffUL) +#define REFLOCK__DESTROY ((long) 0x10000000UL) +#define REFLOCK__DESTROY_MASK ((long) 0xf0000000UL) +#define REFLOCK__POISON ((long) 0x300dead0UL) + +static HANDLE reflock__keyed_event = NULL; + +int reflock_global_init(void) { + NTSTATUS status = NtCreateKeyedEvent( + &reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0); + if (status != STATUS_SUCCESS) + return_set_error(-1, RtlNtStatusToDosError(status)); + return 0; +} + +void reflock_init(reflock_t* reflock) { + reflock->state = 0; +} + +static void reflock__signal_event(void* address) { + NTSTATUS status = + NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL); + if (status != STATUS_SUCCESS) + abort(); +} + +static void reflock__await_event(void* address) { + NTSTATUS status = + NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL); + if (status != STATUS_SUCCESS) + abort(); +} + +void reflock_ref(reflock_t* reflock) { + long state = InterlockedAdd(&reflock->state, REFLOCK__REF); + + /* Verify that the counter didn't overflow and the lock isn't destroyed. */ + assert((state & REFLOCK__DESTROY_MASK) == 0); + unused_var(state); +} + +void reflock_unref(reflock_t* reflock) { + long state = InterlockedAdd(&reflock->state, -REFLOCK__REF); + + /* Verify that the lock was referenced and not already destroyed. */ + assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0); + + if (state == REFLOCK__DESTROY) + reflock__signal_event(reflock); +} + +void reflock_unref_and_destroy(reflock_t* reflock) { + long state = + InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF); + long ref_count = state & REFLOCK__REF_MASK; + + /* Verify that the lock was referenced and not already destroyed. */ + assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY); + + if (ref_count != 0) + reflock__await_event(reflock); + + state = InterlockedExchange(&reflock->state, REFLOCK__POISON); + assert(state == REFLOCK__DESTROY); +} + +#define SOCK__KNOWN_EPOLL_EVENTS \ + (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \ + EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP) + +typedef enum sock__poll_status { + SOCK__POLL_IDLE = 0, + SOCK__POLL_PENDING, + SOCK__POLL_CANCELLED +} sock__poll_status_t; + +typedef struct sock_state { + IO_STATUS_BLOCK io_status_block; + AFD_POLL_INFO poll_info; + queue_node_t queue_node; + tree_node_t tree_node; + poll_group_t* poll_group; + SOCKET base_socket; + epoll_data_t user_data; + uint32_t user_events; + uint32_t pending_events; + sock__poll_status_t poll_status; + bool delete_pending; +} sock_state_t; + +static inline sock_state_t* sock__alloc(void) { + sock_state_t* sock_state = malloc(sizeof *sock_state); + if (sock_state == NULL) + return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY); + return sock_state; +} + +static inline void sock__free(sock_state_t* sock_state) { + assert(sock_state != NULL); + free(sock_state); +} + +static inline int sock__cancel_poll(sock_state_t* sock_state) { + assert(sock_state->poll_status == SOCK__POLL_PENDING); + + if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group), + &sock_state->io_status_block) < 0) + return -1; + + sock_state->poll_status = SOCK__POLL_CANCELLED; + sock_state->pending_events = 0; + return 0; +} + +sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) { + SOCKET base_socket; + poll_group_t* poll_group; + sock_state_t* sock_state; + + if (socket == 0 || socket == INVALID_SOCKET) + return_set_error(NULL, ERROR_INVALID_HANDLE); + + base_socket = ws_get_base_socket(socket); + if (base_socket == INVALID_SOCKET) + return NULL; + + poll_group = poll_group_acquire(port_state); + if (poll_group == NULL) + return NULL; + + sock_state = sock__alloc(); + if (sock_state == NULL) + goto err1; + + memset(sock_state, 0, sizeof *sock_state); + + sock_state->base_socket = base_socket; + sock_state->poll_group = poll_group; + + tree_node_init(&sock_state->tree_node); + queue_node_init(&sock_state->queue_node); + + if (port_register_socket(port_state, sock_state, socket) < 0) + goto err2; + + return sock_state; + +err2: + sock__free(sock_state); +err1: + poll_group_release(poll_group); + + return NULL; +} + +static int sock__delete(port_state_t* port_state, + sock_state_t* sock_state, + bool force) { + if (!sock_state->delete_pending) { + if (sock_state->poll_status == SOCK__POLL_PENDING) + sock__cancel_poll(sock_state); + + port_cancel_socket_update(port_state, sock_state); + port_unregister_socket(port_state, sock_state); + + sock_state->delete_pending = true; + } + + /* If the poll request still needs to complete, the sock_state object can't + * be free()d yet. `sock_feed_event()` or `port_close()` will take care + * of this later. */ + if (force || sock_state->poll_status == SOCK__POLL_IDLE) { + /* Free the sock_state now. */ + port_remove_deleted_socket(port_state, sock_state); + poll_group_release(sock_state->poll_group); + sock__free(sock_state); + } else { + /* Free the socket later. */ + port_add_deleted_socket(port_state, sock_state); + } + + return 0; +} + +void sock_delete(port_state_t* port_state, sock_state_t* sock_state) { + sock__delete(port_state, sock_state, false); +} + +void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) { + sock__delete(port_state, sock_state, true); +} + +int sock_set_event(port_state_t* port_state, + sock_state_t* sock_state, + const struct epoll_event* ev) { + /* EPOLLERR and EPOLLHUP are always reported, even when not requested by the + * caller. However they are disabled after a event has been reported for a + * socket for which the EPOLLONESHOT flag was set. */ + uint32_t events = ev->events | EPOLLERR | EPOLLHUP; + + sock_state->user_events = events; + sock_state->user_data = ev->data; + + if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0) + port_request_socket_update(port_state, sock_state); + + return 0; +} + +static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) { + /* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the + * socket is closed with closesocket() or CloseHandle(). */ + DWORD afd_events = AFD_POLL_LOCAL_CLOSE; + + if (epoll_events & (EPOLLIN | EPOLLRDNORM)) + afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT; + if (epoll_events & (EPOLLPRI | EPOLLRDBAND)) + afd_events |= AFD_POLL_RECEIVE_EXPEDITED; + if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND)) + afd_events |= AFD_POLL_SEND; + if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP)) + afd_events |= AFD_POLL_DISCONNECT; + if (epoll_events & EPOLLHUP) + afd_events |= AFD_POLL_ABORT; + if (epoll_events & EPOLLERR) + afd_events |= AFD_POLL_CONNECT_FAIL; + + return afd_events; +} + +static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) { + uint32_t epoll_events = 0; + + if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT)) + epoll_events |= EPOLLIN | EPOLLRDNORM; + if (afd_events & AFD_POLL_RECEIVE_EXPEDITED) + epoll_events |= EPOLLPRI | EPOLLRDBAND; + if (afd_events & AFD_POLL_SEND) + epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; + if (afd_events & AFD_POLL_DISCONNECT) + epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + if (afd_events & AFD_POLL_ABORT) + epoll_events |= EPOLLHUP; + if (afd_events & AFD_POLL_CONNECT_FAIL) + /* Linux reports all these events after connect() has failed. */ + epoll_events |= + EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP; + + return epoll_events; +} + +int sock_update(port_state_t* port_state, sock_state_t* sock_state) { + assert(!sock_state->delete_pending); + + if ((sock_state->poll_status == SOCK__POLL_PENDING) && + (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS & + ~sock_state->pending_events) == 0) { + /* All the events the user is interested in are already being monitored by + * the pending poll operation. It might spuriously complete because of an + * event that we're no longer interested in; when that happens we'll submit + * a new poll operation with the updated event mask. */ + + } else if (sock_state->poll_status == SOCK__POLL_PENDING) { + /* A poll operation is already pending, but it's not monitoring for all the + * events that the user is interested in. Therefore, cancel the pending + * poll operation; when we receive it's completion package, a new poll + * operation will be submitted with the correct event mask. */ + if (sock__cancel_poll(sock_state) < 0) + return -1; + + } else if (sock_state->poll_status == SOCK__POLL_CANCELLED) { + /* The poll operation has already been cancelled, we're still waiting for + * it to return. For now, there's nothing that needs to be done. */ + + } else if (sock_state->poll_status == SOCK__POLL_IDLE) { + /* No poll operation is pending; start one. */ + sock_state->poll_info.Exclusive = FALSE; + sock_state->poll_info.NumberOfHandles = 1; + sock_state->poll_info.Timeout.QuadPart = INT64_MAX; + sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket; + sock_state->poll_info.Handles[0].Status = 0; + sock_state->poll_info.Handles[0].Events = + sock__epoll_events_to_afd_events(sock_state->user_events); + + if (afd_poll(poll_group_get_afd_device_handle(sock_state->poll_group), + &sock_state->poll_info, + &sock_state->io_status_block) < 0) { + switch (GetLastError()) { + case ERROR_IO_PENDING: + /* Overlapped poll operation in progress; this is expected. */ + break; + case ERROR_INVALID_HANDLE: + /* Socket closed; it'll be dropped from the epoll set. */ + return sock__delete(port_state, sock_state, false); + default: + /* Other errors are propagated to the caller. */ + return_map_error(-1); + } + } + + /* The poll request was successfully submitted. */ + sock_state->poll_status = SOCK__POLL_PENDING; + sock_state->pending_events = sock_state->user_events; + + } else { + /* Unreachable. */ + assert(false); + } + + port_cancel_socket_update(port_state, sock_state); + return 0; +} + +int sock_feed_event(port_state_t* port_state, + IO_STATUS_BLOCK* io_status_block, + struct epoll_event* ev) { + sock_state_t* sock_state = + container_of(io_status_block, sock_state_t, io_status_block); + AFD_POLL_INFO* poll_info = &sock_state->poll_info; + uint32_t epoll_events = 0; + + sock_state->poll_status = SOCK__POLL_IDLE; + sock_state->pending_events = 0; + + if (sock_state->delete_pending) { + /* Socket has been deleted earlier and can now be freed. */ + return sock__delete(port_state, sock_state, false); + + } else if (io_status_block->Status == STATUS_CANCELLED) { + /* The poll request was cancelled by CancelIoEx. */ + + } else if (!NT_SUCCESS(io_status_block->Status)) { + /* The overlapped request itself failed in an unexpected way. */ + epoll_events = EPOLLERR; + + } else if (poll_info->NumberOfHandles < 1) { + /* This poll operation succeeded but didn't report any socket events. */ + + } else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) { + /* The poll operation reported that the socket was closed. */ + return sock__delete(port_state, sock_state, false); + + } else { + /* Events related to our socket were reported. */ + epoll_events = + sock__afd_events_to_epoll_events(poll_info->Handles[0].Events); + } + + /* Requeue the socket so a new poll request will be submitted. */ + port_request_socket_update(port_state, sock_state); + + /* Filter out events that the user didn't ask for. */ + epoll_events &= sock_state->user_events; + + /* Return if there are no epoll events to report. */ + if (epoll_events == 0) + return 0; + + /* If the the socket has the EPOLLONESHOT flag set, unmonitor all events, + * even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */ + if (sock_state->user_events & EPOLLONESHOT) + sock_state->user_events = 0; + + ev->data = sock_state->user_data; + ev->events = epoll_events; + return 1; +} + +sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) { + return container_of(queue_node, sock_state_t, queue_node); +} + +queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) { + return &sock_state->queue_node; +} + +sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) { + return container_of(tree_node, sock_state_t, tree_node); +} + +tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) { + return &sock_state->tree_node; +} + +void ts_tree_init(ts_tree_t* ts_tree) { + tree_init(&ts_tree->tree); + InitializeSRWLock(&ts_tree->lock); +} + +void ts_tree_node_init(ts_tree_node_t* node) { + tree_node_init(&node->tree_node); + reflock_init(&node->reflock); +} + +int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) { + int r; + + AcquireSRWLockExclusive(&ts_tree->lock); + r = tree_add(&ts_tree->tree, &node->tree_node, key); + ReleaseSRWLockExclusive(&ts_tree->lock); + + return r; +} + +static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree, + uintptr_t key) { + tree_node_t* tree_node = tree_find(&ts_tree->tree, key); + if (tree_node == NULL) + return NULL; + + return container_of(tree_node, ts_tree_node_t, tree_node); +} + +ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) { + ts_tree_node_t* ts_tree_node; + + AcquireSRWLockExclusive(&ts_tree->lock); + + ts_tree_node = ts_tree__find_node(ts_tree, key); + if (ts_tree_node != NULL) { + tree_del(&ts_tree->tree, &ts_tree_node->tree_node); + reflock_ref(&ts_tree_node->reflock); + } + + ReleaseSRWLockExclusive(&ts_tree->lock); + + return ts_tree_node; +} + +ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) { + ts_tree_node_t* ts_tree_node; + + AcquireSRWLockShared(&ts_tree->lock); + + ts_tree_node = ts_tree__find_node(ts_tree, key); + if (ts_tree_node != NULL) + reflock_ref(&ts_tree_node->reflock); + + ReleaseSRWLockShared(&ts_tree->lock); + + return ts_tree_node; +} + +void ts_tree_node_unref(ts_tree_node_t* node) { + reflock_unref(&node->reflock); +} + +void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) { + reflock_unref_and_destroy(&node->reflock); +} + +void tree_init(tree_t* tree) { + memset(tree, 0, sizeof *tree); +} + +void tree_node_init(tree_node_t* node) { + memset(node, 0, sizeof *node); +} + +#define TREE__ROTATE(cis, trans) \ + tree_node_t* p = node; \ + tree_node_t* q = node->trans; \ + tree_node_t* parent = p->parent; \ + \ + if (parent) { \ + if (parent->left == p) \ + parent->left = q; \ + else \ + parent->right = q; \ + } else { \ + tree->root = q; \ + } \ + \ + q->parent = parent; \ + p->parent = q; \ + p->trans = q->cis; \ + if (p->trans) \ + p->trans->parent = p; \ + q->cis = p; + +static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) { + TREE__ROTATE(left, right) +} + +static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) { + TREE__ROTATE(right, left) +} + +#define TREE__INSERT_OR_DESCEND(side) \ + if (parent->side) { \ + parent = parent->side; \ + } else { \ + parent->side = node; \ + break; \ + } + +#define TREE__REBALANCE_AFTER_INSERT(cis, trans) \ + tree_node_t* grandparent = parent->parent; \ + tree_node_t* uncle = grandparent->trans; \ + \ + if (uncle && uncle->red) { \ + parent->red = uncle->red = false; \ + grandparent->red = true; \ + node = grandparent; \ + } else { \ + if (node == parent->trans) { \ + tree__rotate_##cis(tree, parent); \ + node = parent; \ + parent = node->parent; \ + } \ + parent->red = false; \ + grandparent->red = true; \ + tree__rotate_##trans(tree, grandparent); \ + } + +int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) { + tree_node_t* parent; + + parent = tree->root; + if (parent) { + for (;;) { + if (key < parent->key) { + TREE__INSERT_OR_DESCEND(left) + } else if (key > parent->key) { + TREE__INSERT_OR_DESCEND(right) + } else { + return -1; + } + } + } else { + tree->root = node; + } + + node->key = key; + node->left = node->right = NULL; + node->parent = parent; + node->red = true; + + for (; parent && parent->red; parent = node->parent) { + if (parent == parent->parent->left) { + TREE__REBALANCE_AFTER_INSERT(left, right) + } else { + TREE__REBALANCE_AFTER_INSERT(right, left) + } + } + tree->root->red = false; + + return 0; +} + +#define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \ + tree_node_t* sibling = parent->trans; \ + \ + if (sibling->red) { \ + sibling->red = false; \ + parent->red = true; \ + tree__rotate_##cis(tree, parent); \ + sibling = parent->trans; \ + } \ + if ((sibling->left && sibling->left->red) || \ + (sibling->right && sibling->right->red)) { \ + if (!sibling->trans || !sibling->trans->red) { \ + sibling->cis->red = false; \ + sibling->red = true; \ + tree__rotate_##trans(tree, sibling); \ + sibling = parent->trans; \ + } \ + sibling->red = parent->red; \ + parent->red = sibling->trans->red = false; \ + tree__rotate_##cis(tree, parent); \ + node = tree->root; \ + break; \ + } \ + sibling->red = true; + +void tree_del(tree_t* tree, tree_node_t* node) { + tree_node_t* parent = node->parent; + tree_node_t* left = node->left; + tree_node_t* right = node->right; + tree_node_t* next; + bool red; + + if (!left) { + next = right; + } else if (!right) { + next = left; + } else { + next = right; + while (next->left) + next = next->left; + } + + if (parent) { + if (parent->left == node) + parent->left = next; + else + parent->right = next; + } else { + tree->root = next; + } + + if (left && right) { + red = next->red; + next->red = node->red; + next->left = left; + left->parent = next; + if (next != right) { + parent = next->parent; + next->parent = node->parent; + node = next->right; + parent->left = node; + next->right = right; + right->parent = next; + } else { + next->parent = parent; + parent = next; + node = next->right; + } + } else { + red = node->red; + node = next; + } + + if (node) + node->parent = parent; + if (red) + return; + if (node && node->red) { + node->red = false; + return; + } + + do { + if (node == tree->root) + break; + if (node == parent->left) { + TREE__REBALANCE_AFTER_REMOVE(left, right) + } else { + TREE__REBALANCE_AFTER_REMOVE(right, left) + } + node = parent; + parent = parent->parent; + } while (!node->red); + + if (node) + node->red = false; +} + +tree_node_t* tree_find(const tree_t* tree, uintptr_t key) { + tree_node_t* node = tree->root; + while (node) { + if (key < node->key) + node = node->left; + else if (key > node->key) + node = node->right; + else + return node; + } + return NULL; +} + +tree_node_t* tree_root(const tree_t* tree) { + return tree->root; +} + +#ifndef SIO_BSP_HANDLE_POLL +#define SIO_BSP_HANDLE_POLL 0x4800001D +#endif + +#ifndef SIO_BASE_HANDLE +#define SIO_BASE_HANDLE 0x48000022 +#endif + +int ws_global_init(void) { + int r; + WSADATA wsa_data; + + r = WSAStartup(MAKEWORD(2, 2), &wsa_data); + if (r != 0) + return_set_error(-1, (DWORD) r); + + return 0; +} + +static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) { + SOCKET bsp_socket; + DWORD bytes; + + if (WSAIoctl(socket, + ioctl, + NULL, + 0, + &bsp_socket, + sizeof bsp_socket, + &bytes, + NULL, + NULL) != SOCKET_ERROR) + return bsp_socket; + else + return INVALID_SOCKET; +} + +SOCKET ws_get_base_socket(SOCKET socket) { + SOCKET base_socket; + DWORD error; + + for (;;) { + base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE); + if (base_socket != INVALID_SOCKET) + return base_socket; + + error = GetLastError(); + if (error == WSAENOTSOCK) + return_set_error(INVALID_SOCKET, error); + + /* Even though Microsoft documentation clearly states that LSPs should + * never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do + * so anyway, breaking it, with the apparent intention of preventing LSP + * bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which + * will at least let us obtain the socket associated with the next winsock + * protocol chain entry. If this succeeds, loop around and call + * `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that + * we unwrap all layers and retrieve the actual base socket. + * [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls + * [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6 + */ + base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL); + if (base_socket != INVALID_SOCKET && base_socket != socket) + socket = base_socket; + else + return_set_error(INVALID_SOCKET, error); + } +} \ No newline at end of file diff --git a/deps/oblib/src/lib/wepoll/wepoll.h b/deps/oblib/src/lib/wepoll/wepoll.h new file mode 100644 index 000000000..d25f971d0 --- /dev/null +++ b/deps/oblib/src/lib/wepoll/wepoll.h @@ -0,0 +1,113 @@ +/* + * wepoll - epoll for Windows + * https://github.com/piscisaureus/wepoll + * + * Copyright 2012-2020, Bert Belder + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WEPOLL_H_ +#define WEPOLL_H_ + +#ifndef WEPOLL_EXPORT +#define WEPOLL_EXPORT +#endif + +#include + +enum EPOLL_EVENTS { + EPOLLIN = (int) (1U << 0), + EPOLLPRI = (int) (1U << 1), + EPOLLOUT = (int) (1U << 2), + EPOLLERR = (int) (1U << 3), + EPOLLHUP = (int) (1U << 4), + EPOLLRDNORM = (int) (1U << 6), + EPOLLRDBAND = (int) (1U << 7), + EPOLLWRNORM = (int) (1U << 8), + EPOLLWRBAND = (int) (1U << 9), + EPOLLMSG = (int) (1U << 10), /* Never reported. */ + EPOLLRDHUP = (int) (1U << 13), + EPOLLONESHOT = (int) (1U << 31) +}; + +#define EPOLLIN (1U << 0) +#define EPOLLPRI (1U << 1) +#define EPOLLOUT (1U << 2) +#define EPOLLERR (1U << 3) +#define EPOLLHUP (1U << 4) +#define EPOLLRDNORM (1U << 6) +#define EPOLLRDBAND (1U << 7) +#define EPOLLWRNORM (1U << 8) +#define EPOLLWRBAND (1U << 9) +#define EPOLLMSG (1U << 10) +#define EPOLLRDHUP (1U << 13) +#define EPOLLONESHOT (1U << 31) + +#define EPOLL_CTL_ADD 1 +#define EPOLL_CTL_MOD 2 +#define EPOLL_CTL_DEL 3 + +typedef void* HANDLE; +typedef uintptr_t SOCKET; + +typedef union epoll_data { + void* ptr; + int fd; + uint32_t u32; + uint64_t u64; + SOCKET sock; /* Windows specific */ + HANDLE hnd; /* Windows specific */ +} epoll_data_t; + +struct epoll_event { + uint32_t events; /* Epoll events and flags */ + epoll_data_t data; /* User data variable */ +}; + +#ifdef __cplusplus +extern "C" { +#endif + +WEPOLL_EXPORT HANDLE epoll_create(int size); +WEPOLL_EXPORT HANDLE epoll_create1(int flags); + +WEPOLL_EXPORT int epoll_close(HANDLE ephnd); + +WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd, + int op, + SOCKET sock, + struct epoll_event* event); + +WEPOLL_EXPORT int epoll_wait(HANDLE ephnd, + struct epoll_event* events, + int maxevents, + int timeout); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* WEPOLL_H_ */ \ No newline at end of file diff --git a/deps/oblib/src/rpc/obmysql/ob_sql_nio.cpp b/deps/oblib/src/rpc/obmysql/ob_sql_nio.cpp index 1542a4619..c9dc4b7de 100644 --- a/deps/oblib/src/rpc/obmysql/ob_sql_nio.cpp +++ b/deps/oblib/src/rpc/obmysql/ob_sql_nio.cpp @@ -223,8 +223,19 @@ static inline int accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, // ============================================================ #include // WaitOnAddress / WakeByAddressSingle #include // AF_UNIX / sockaddr_un (Windows 10+) +#include // eventfd emulation table (legacy + wepoll paths) + +#ifdef OB_USE_WEPOLL +// ---- Real epoll via wepoll (AFD/IOCP) ---- +#include "lib/net/ob_epoll_compat.h" + +extern "C" int ob_win32_epoll_wait_impl(int epfd, struct epoll_event *events, + int maxevents, int timeout) { + // Compat-header macro redirects this to wepoll's epoll_wait via the registry. + return epoll_wait(epfd, events, maxevents, timeout); +} +#else // !OB_USE_WEPOLL — legacy WSAPoll-based emulation #include -#include // ---- epoll flags ---- #define EPOLLIN 0x001 @@ -307,6 +318,7 @@ extern "C" int ob_win32_epoll_wait_impl(int epfd, struct epoll_event *events, in static inline int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) { return ob_win32_epoll_wait_impl(epfd, events, maxevents, timeout); } +#endif // OB_USE_WEPOLL // ---- eventfd emulation via loopback socket pair ---- #define EFD_NONBLOCK 1 @@ -1284,12 +1296,12 @@ class ObSqlNioImpl #endif } //use need_monopolize to prevent two observer processes use the same mysql port - int init(int port, bool need_monopolize, const char* unix_socket_path, bool disable_tcp) { + int init(int port, bool need_monopolize, const char* unix_socket_path = NULL) { int ret = OB_SUCCESS; if (port == -1) { ret = init_io(); } else { - ret = init_listen(port, need_monopolize, unix_socket_path, disable_tcp); + ret = init_listen(port, need_monopolize, unix_socket_path); } return ret; } @@ -1322,16 +1334,15 @@ class ObSqlNioImpl } return ret; } - int init_listen(int port, bool need_monopolize, const char* unix_socket_path, bool disable_tcp) { + int init_listen(int port, bool need_monopolize, const char* unix_socket_path) { int ret = OB_SUCCESS; uint32_t epflag = EPOLLIN; if ((epfd_ = epoll_create1(EPOLL_CLOEXEC)) < 0) { ret = OB_IO_ERROR; LOG_WARN("epoll_create fail", K(ret), K(errno)); } else { - if (disable_tcp) { - LOG_INFO("embedded mode: skipping tcp listen", K(port)); - } else if ((lfd_ = listen_create(!oceanbase::lib::use_ipv6() ? AF_INET : AF_INET6, port, need_monopolize)) < 0) { + // add tcp socket listen + if ((lfd_ = listen_create(!oceanbase::lib::use_ipv6() ? AF_INET : AF_INET6, port, need_monopolize)) < 0) { ret = OB_SERVER_LISTEN_ERROR; LOG_ERROR("listen create fail", K(ret), K(port), K(errno), KERRNOMSG(errno)); LOG_DBA_ERROR_V2(OB_SERVER_LISTEN_FAIL, ret, @@ -1348,21 +1359,7 @@ class ObSqlNioImpl if (OB_SUCC(ret) && unix_socket_path != NULL) { #ifdef _WIN32 // Windows: start Named Pipe server instead of AF_UNIX - char pipe_name[256] = {0}; - snprintf(pipe_name, sizeof(pipe_name), "%lu-%lld", - (unsigned long)GetCurrentProcessId(), - (long long)time(NULL)); - FILE *out_fp = fopen("run/sql.pipe", "w"); - if (NULL != out_fp) { - fputs(pipe_name, out_fp); - fclose(out_fp); - LOG_INFO("wrote run/sql.pipe", K(pipe_name)); - } else { - LOG_WARN("failed to write run/sql.pipe", K(errno)); - } - char pipe_full_path[300] = {0}; - snprintf(pipe_full_path, sizeof(pipe_full_path), "\\\\.\\pipe\\%s", pipe_name); - if (OB_FAIL(win_pipe_server_.init(pipe_full_path, this))) { + if (OB_FAIL(win_pipe_server_.init("\\\\.\\pipe\\MySQL", this))) { LOG_WARN("named pipe init fail", K(ret)); // Does not affect TCP, do not return error ret = OB_SUCCESS; @@ -1757,7 +1754,7 @@ class ObSqlNioImpl }; int ObSqlNio::start(int port, ObISqlSockHandler *handler, int n_thread, - const uint64_t tenant_id, bool disable_tcp) + const uint64_t tenant_id) { int ret = OB_SUCCESS; port_ = port; @@ -1777,7 +1774,7 @@ int ObSqlNio::start(int port, ObISqlSockHandler *handler, int n_thread, } bool need_monopolize = ((i == 0) ? true : false); new (impl_ + i) ObSqlNioImpl(*handler); - if (OB_FAIL(impl_[i].init(port, need_monopolize, unix_socket_path, disable_tcp))) { + if (OB_FAIL(impl_[i].init(port, need_monopolize, unix_socket_path))) { LOG_WARN("impl init fail", K(ret)); } } @@ -1943,7 +1940,7 @@ int ObSqlNio::set_thread_count(const int n_thread) if (n_thread > cur_thread) { for (int i = cur_thread; OB_SUCCESS == ret && i < n_thread; i++) { new (impl_ + i) ObSqlNioImpl(*handler_); - if (OB_FAIL(impl_[i].init(port_, need_monopolize, NULL, false))) { + if (OB_FAIL(impl_[i].init(port_, need_monopolize))) { LOG_WARN("impl init fail"); } } diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index dcd3b0af5..ba1499b6d 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -686,6 +686,9 @@ if(WIN32) ${CMAKE_BINARY_DIR}/deps/oblib/src/lib/compress/zstd_1_3_8/zstd_1_3_8_objs.lib ${CMAKE_BINARY_DIR}/deps/oblib/src/lib/compress/zstd/zstd_objs.lib ${CMAKE_BINARY_DIR}/deps/oblib/src/lib/compress/lz4/lz4-all.lib) + if(OB_USE_WEPOLL) + target_link_libraries(observer_without_bolt PRIVATE wepoll) + endif() # Itanium ABI _Unwind_* stubs (Windows uses SEH; PL unwinding non-functional) target_sources(observer_without_bolt PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/win32_unwind_stubs.c) endif()