Skip to content

test_os_open in SocketEINTRTest hangs indefinitely on NetBSD #137397

@furkanonder

Description

@furkanonder

Bug report

Bug description:

The SocketEINTRTest.test_os_open test in test_eintr hangs indefinitely on NetBSD 10.0(x86_64). This appears to be a NetBSD-specific issue with FIFO operations under frequent signal interruption, similar to the issue described here.

Configuration

./configure --with-pydebug

Test Output

Warning -- files was modified by test_eintr
Warning --   Before: []
Warning --   After:  ['@test_16354_tmpæ']
test test_eintr failed -- Traceback (most recent call last):
  File "/home/blue/Desktop/cpython/Lib/test/test_eintr.py", line 17, in test_all
    script_helper.run_test_script(script)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^
  File "/home/blue/Desktop/cpython/Lib/test/support/script_helper.py", line 324, in run_test_script
    assert_python_ok("-u", script, "-v")
    ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^
  File "/home/blue/Desktop/cpython/Lib/test/support/script_helper.py", line 182, in assert_python_ok
    return _assert_python(True, *args, **env_vars)
  File "/home/blue/Desktop/cpython/Lib/test/support/script_helper.py", line 167, in _assert_python
    res.fail(cmd_line)
    ~~~~~~~~^^^^^^^^^^
  File "/home/blue/Desktop/cpython/Lib/test/support/script_helper.py", line 80, in fail
    raise AssertionError(f"Process return code is {exitcode}\n"
    ...<10 lines>...
                         f"---")
AssertionError: Process return code is 1
command line: ['/home/blue/Desktop/cpython/python', '-X', 'faulthandler', '-I', '-u', '/home/blue/Desktop/cpython/Lib/test/_test_eintr.py', '-v']

stdout:
---

---

stderr:
---
test_flock (__main__.FCNTLEINTRTest.test_flock) ... ok
test_lockf (__main__.FCNTLEINTRTest.test_lockf) ... ok
test_read (__main__.OSEINTRTest.test_read) ... ok
test_readinto (__main__.OSEINTRTest.test_readinto) ... ok
test_wait (__main__.OSEINTRTest.test_wait) ... ok
test_wait3 (__main__.OSEINTRTest.test_wait3) ... ok
test_wait4 (__main__.OSEINTRTest.test_wait4) ... ok
test_waitpid (__main__.OSEINTRTest.test_waitpid) ... ok
test_write (__main__.OSEINTRTest.test_write) ... ok
test_devpoll (__main__.SelectEINTRTest.test_devpoll) ... skipped 'need select.devpoll'
test_epoll (__main__.SelectEINTRTest.test_epoll) ... skipped 'need select.epoll'
test_kqueue (__main__.SelectEINTRTest.test_kqueue) ... ok
test_poll (__main__.SelectEINTRTest.test_poll) ... ok
test_select (__main__.SelectEINTRTest.test_select) ... ok
test_sigtimedwait (__main__.SignalEINTRTest.test_sigtimedwait) ... ok
test_sigwaitinfo (__main__.SignalEINTRTest.test_sigwaitinfo) ... ERROR
test_accept (__main__.SocketEINTRTest.test_accept) ... ok
test_open (__main__.SocketEINTRTest.test_open) ... ok
test_os_open (__main__.SocketEINTRTest.test_os_open) ... Timeout (0:10:00)!
Thread 0x00007c47bfed2800 (most recent call first):
  File "/home/blue/Desktop/cpython/Lib/test/_test_eintr.py", line 378 in os_open
  File "/home/blue/Desktop/cpython/Lib/test/_test_eintr.py", line 364 in _test_open
  File "/home/blue/Desktop/cpython/Lib/test/_test_eintr.py", line 384 in test_os_open
  File "/home/blue/Desktop/cpython/Lib/unittest/case.py", line 613 in _callTestMethod
  File "/home/blue/Desktop/cpython/Lib/unittest/case.py", line 667 in run
  File "/home/blue/Desktop/cpython/Lib/unittest/case.py", line 723 in __call__
  File "/home/blue/Desktop/cpython/Lib/unittest/suite.py", line 122 in run
  File "/home/blue/Desktop/cpython/Lib/unittest/suite.py", line 84 in __call__
  File "/home/blue/Desktop/cpython/Lib/unittest/suite.py", line 122 in run
  File "/home/blue/Desktop/cpython/Lib/unittest/suite.py", line 84 in __call__
  File "/home/blue/Desktop/cpython/Lib/unittest/runner.py", line 257 in run
  File "/home/blue/Desktop/cpython/Lib/unittest/main.py", line 270 in runTests
  File "/home/blue/Desktop/cpython/Lib/unittest/main.py", line 104 in __init__
  File "/home/blue/Desktop/cpython/Lib/test/_test_eintr.py", line 552 in <module>
---

Reproduction

I created a minimal C program that reproduces the same issue.

#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

volatile sig_atomic_t signal_count = 0;

/**
 * Signal handler for SIGALRM.
 */
void handle_signal(int sig) {
    (void)sig;
    signal_count++;
}

/**
 * Retry open() if it's interrupted by a signal (EINTR).
 */
int safe_open(const char *path, int flags) {
    int fd;
    while ((fd = open(path, flags)) < 0) {
        if (errno != EINTR) {
            perror("open");
            exit(EXIT_FAILURE);
        }
        write(STDOUT_FILENO, ".", 1);
    }
    return fd;
}

/**
 * Retry close() if it's interrupted by a signal (EINTR).
 */
int safe_close(int fd) {
    int ret;
    while ((ret = close(fd)) < 0) {
        if (errno != EINTR) {
            perror("close");
            return ret;
        }
        write(STDOUT_FILENO, "C", 1);
    }
    return ret;
}

/**
 * Sleep for a specified number of milliseconds.
 */
void sleep_ms(long ms) {
    struct timespec ts;
    ts.tv_sec = ms / 1000;
    ts.tv_nsec = (ms % 1000) * 1000000L;
    nanosleep(&ts, NULL);
}

/**
 * Set up a timer to send SIGALRM every 10 milliseconds.
 */
void setup_timer(void) {
    struct sigaction sa = {0};
    sa.sa_handler = handle_signal;
    sigaction(SIGALRM, &sa, NULL);

    struct itimerval timer = {
        .it_value = {0, 10000},      // Start after 10ms
        .it_interval = {0, 10000}    // Repeat every 10ms
    };

    if (setitimer(ITIMER_REAL, &timer, NULL) < 0) {
        perror("setitimer");
        exit(EXIT_FAILURE);
    }
}

int main() {
    printf("EINTR test - Ctrl+C to stop\n");
    setup_timer();

    for (int i = 1; i <= 50; ++i) {
        char fifo[64];
        snprintf(fifo, sizeof(fifo), "/tmp/test_fifo_%d", i);
        unlink(fifo);
        if (mkfifo(fifo, 0666) < 0) {
            perror("mkfifo");
            exit(EXIT_FAILURE);
        }

        pid_t pid = fork();
        if (pid < 0) {
            perror("fork");
            exit(EXIT_FAILURE);
        }
        if (pid == 0) {
            // Child opens FIFO for reading
            sleep_ms(50);  // 50ms delay to let parent open writer
            int fd = safe_open(fifo, O_RDONLY);
            safe_close(fd);
            exit(EXIT_SUCCESS);
        }
        else {
            // Parent opens FIFO for writing
            int fd = safe_open(fifo, O_WRONLY);
            safe_close(fd);
            wait(NULL);
            unlink(fifo);
            printf("Loop %d OK (signals: %d)\n", i, signal_count);
            signal_count = 0;
        }

        sleep_ms(1);  // Small pause before next iteration
    }

    printf("Test complete.\n");
    return EXIT_SUCCESS;
}

Output:

╭─blue@home ~
╰─$ gcc reproducer.c -o reproducer
╭─blue@home ~
╰─$ ./reproducer
EINTR test - Ctrl+C to stop
...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................^C

Similar output as expected:

╰─$ ./reproducer
EINTR test - Ctrl+C to stop
.....Loop 1 OK (signals: 5)
.....Loop 2 OK (signals: 5)
.....Loop 3 OK (signals: 5)
.....Loop 4 OK (signals: 5)
.....Loop 5 OK (signals: 5)
.....Loop 6 OK (signals: 5)
.....Loop 7 OK (signals: 6)
.....Loop 8 OK (signals: 5)
.....Loop 9 OK (signals: 5)
.....Loop 10 OK (signals: 5)
.....Loop 11 OK (signals: 5)
.....Loop 12 OK (signals: 5)
.....Loop 13 OK (signals: 5)
.....Loop 14 OK (signals: 6)
.....Loop 15 OK (signals: 5)
.....Loop 16 OK (signals: 5)
.....Loop 17 OK (signals: 5)
.....Loop 18 OK (signals: 5)
.....Loop 19 OK (signals: 5)
.....Loop 20 OK (signals: 5)
.....Loop 21 OK (signals: 6)
.....Loop 22 OK (signals: 5)
.....Loop 23 OK (signals: 5)
.....Loop 24 OK (signals: 5)
.....Loop 25 OK (signals: 5)
.....Loop 26 OK (signals: 5)
.....Loop 27 OK (signals: 5)
.....Loop 28 OK (signals: 6)
.....Loop 29 OK (signals: 5)
.....Loop 30 OK (signals: 5)
.....Loop 31 OK (signals: 5)
.....Loop 32 OK (signals: 5)
.....Loop 33 OK (signals: 5)
.....Loop 34 OK (signals: 5)
.....Loop 35 OK (signals: 6)
.....Loop 36 OK (signals: 5)
.....Loop 37 OK (signals: 5)
.....Loop 38 OK (signals: 5)
.....Loop 39 OK (signals: 5)
.....Loop 40 OK (signals: 5)
.....Loop 41 OK (signals: 5)
.....Loop 42 OK (signals: 6)
.....Loop 43 OK (signals: 5)
.....Loop 44 OK (signals: 5)
.....Loop 45 OK (signals: 5)
.....Loop 46 OK (signals: 5)
.....Loop 47 OK (signals: 5)
.....Loop 48 OK (signals: 5)
.....Loop 49 OK (signals: 6)
.....Loop 50 OK (signals: 5)
Test complete.
╰─$ ktrace -f ktrace.out ./reproducer
EINTR test - Ctrl+C to stop
...............................................................................................................................^C
╰─$ kdump -f ktrace.out | tail -50
 17368  17368 reproducer CALL  setcontext(0x7f7fff444000)
 17368  17368 reproducer RET   setcontext JUSTRETURN
 17368  17368 reproducer CALL  write(1,0x401173,1)
 17368  17368 reproducer GIO   fd 1 wrote 1 bytes
       "."
 17368  17368 reproducer RET   write 1
 17368  17368 reproducer CALL  open(0x7f7fff4443d0,1,1)
 17368  17368 reproducer NAMI  "/tmp/test_fifo_1"
 17368  17368 reproducer RET   open -1 errno 4 Interrupted system call
 17368  17368 reproducer PSIG  SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
 17368  17368 reproducer CALL  setcontext(0x7f7fff444000)
 17368  17368 reproducer RET   setcontext JUSTRETURN
 17368  17368 reproducer CALL  write(1,0x401173,1)
 17368  17368 reproducer GIO   fd 1 wrote 1 bytes
       "."
 17368  17368 reproducer RET   write 1
 17368  17368 reproducer CALL  open(0x7f7fff4443d0,1,1)
 17368  17368 reproducer NAMI  "/tmp/test_fifo_1"
 17368  17368 reproducer RET   open -1 errno 4 Interrupted system call
 17368  17368 reproducer PSIG  SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
 17368  17368 reproducer CALL  setcontext(0x7f7fff444000)
 17368  17368 reproducer RET   setcontext JUSTRETURN
 17368  17368 reproducer CALL  write(1,0x401173,1)
 17368  17368 reproducer GIO   fd 1 wrote 1 bytes
       "."
 17368  17368 reproducer RET   write 1
 17368  17368 reproducer CALL  open(0x7f7fff4443d0,1,1)
 17368  17368 reproducer NAMI  "/tmp/test_fifo_1"
 17368  17368 reproducer RET   open -1 errno 4 Interrupted system call
 17368  17368 reproducer PSIG  SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
 17368  17368 reproducer CALL  setcontext(0x7f7fff444000)
 17368  17368 reproducer RET   setcontext JUSTRETURN
 17368  17368 reproducer CALL  write(1,0x401173,1)
 17368  17368 reproducer GIO   fd 1 wrote 1 bytes
       "."
 17368  17368 reproducer RET   write 1
 17368  17368 reproducer CALL  open(0x7f7fff4443d0,1,1)
 17368  17368 reproducer NAMI  "/tmp/test_fifo_1"
 17368  17368 reproducer RET   open -1 errno 4 Interrupted system call
 17368  17368 reproducer PSIG  SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
 17368  17368 reproducer CALL  setcontext(0x7f7fff444000)
 17368  17368 reproducer RET   setcontext JUSTRETURN
 17368  17368 reproducer CALL  write(1,0x401173,1)
 17368  17368 reproducer GIO   fd 1 wrote 1 bytes
       "."
 17368  17368 reproducer RET   write 1
 17368  17368 reproducer CALL  open(0x7f7fff4443d0,1,1)
 17368  17368 reproducer NAMI  "/tmp/test_fifo_1"
 17368  17368 reproducer RET   open RESTART
 17368  17368 reproducer PSIG  SIGINT SIG_DFL: code=SI_NOINFO
╭─blue@home ~

CPython versions tested on:

CPython main branch, 3.15, 3.14, 3.13

Operating systems tested on:

Other

Linked PRs

Metadata

Metadata

Assignees

No one assigned

    Labels

    3.13bugs and security fixes3.14bugs and security fixes3.15new features, bugs and security fixesOS-netbsdtestsTests in the Lib/test dirtype-bugAn unexpected behavior, bug, or error

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions