From 65050ef4dc6f81b36b84d92905bf1ad388882f1f Mon Sep 17 00:00:00 2001 From: binwiederhier Date: Sun, 8 Feb 2026 11:23:31 -0500 Subject: [PATCH] Fix server crash (nil pointer panic) when subscriber disconnects during publish --- docs/releases.md | 9 +-- server/server.go | 3 +- server/server_test.go | 128 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 5 deletions(-) diff --git a/docs/releases.md b/docs/releases.md index 7863c15a..cd16054a 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -1685,20 +1685,21 @@ and the [ntfy Android app](https://github.com/binwiederhier/ntfy-android/release **Features:** +* Server: Support templating in the priority field ([#1426](https://github.com/binwiederhier/ntfy/issues/1426), thanks to [@seantomburke](https://github.com/seantomburke) for reporting) * Web: Show red notification dot on favicon when there are unread messages ([#1017](https://github.com/binwiederhier/ntfy/issues/1017), thanks to [@ad-si](https://github.com/ad-si) for reporting) -* Support templating in the priority field ([#1426](https://github.com/binwiederhier/ntfy/issues/1426), thanks to [@seantomburke](https://github.com/seantomburke) for reporting) **Bug fixes + maintenance:** +* Server: Fix crash when commit string is shorter than 7 characters in non-GitHub-Action builds ([#1493](https://github.com/binwiederhier/ntfy/issues/1493), thanks to [@cyrinux](https://github.com/cyrinux) for reporting) +* Server: Fix server crash (nil pointer panic) when subscriber disconnects during publish ([#1598](https://github.com/binwiederhier/ntfy/pull/1598)) +* Server: Fix log spam from `http: response.WriteHeader on hijacked connection` for WebSocket errors ([#1362](https://github.com/binwiederhier/ntfy/issues/1362), thanks to [@bonfiresh](https://github.com/bonfiresh) for reporting) +* Server: Use `slices.Contains` from stdlib to simplify code ([#1406](https://github.com/binwiederhier/ntfy/pull/1406), thanks to [@tanhuaan](https://github.com/tanhuaan)) * Web: Fix `clear=true` on action buttons not clearing the notification ([#1029](https://github.com/binwiederhier/ntfy/issues/1029), thanks to [@ElFishi](https://github.com/ElFishi) for reporting) -* Fix crash when commit string is shorter than 7 characters in non-GitHub-Action builds ([#1493](https://github.com/binwiederhier/ntfy/issues/1493), thanks to [@cyrinux](https://github.com/cyrinux) for reporting) -* Fix log spam from `http: response.WriteHeader on hijacked connection` for WebSocket errors ([#1362](https://github.com/binwiederhier/ntfy/issues/1362), thanks to [@bonfiresh](https://github.com/bonfiresh) for reporting) * Web: Fix Markdown message line height to match plain text (1.5 instead of 1.2) ([#1139](https://github.com/binwiederhier/ntfy/issues/1139), thanks to [@etfz](https://github.com/etfz) for reporting) * Web: Fix long lines (e.g. JSON) being truncated by adding horizontal scroll ([#1363](https://github.com/binwiederhier/ntfy/issues/1363), thanks to [@v3DJG6GL](https://github.com/v3DJG6GL) for reporting) * Web: Fix Windows notification icon being cut off ([#884](https://github.com/binwiederhier/ntfy/issues/884), thanks to [@ZhangTianrong](https://github.com/ZhangTianrong) for reporting) * Web: Use full URL in curl example on empty topic pages ([#1435](https://github.com/binwiederhier/ntfy/issues/1435), [#1535](https://github.com/binwiederhier/ntfy/pull/1535), thanks to [@elmatadoor](https://github.com/elmatadoor) for reporting and [@jjasghar](https://github.com/jjasghar) for the PR) * Web: Add validation feedback for service URL when adding user ([#1566](https://github.com/binwiederhier/ntfy/issues/1566), thanks to [@jermanuts](https://github.com/jermanuts)) -* Refactor: Use `slices.Contains` from stdlib to simplify code ([#1406](https://github.com/binwiederhier/ntfy/pull/1406), thanks to [@tanhuaan](https://github.com/tanhuaan)) * Docs: Remove obsolete `version` field from docker-compose examples ([#1333](https://github.com/binwiederhier/ntfy/issues/1333), thanks to [@seals187](https://github.com/seals187) for reporting and [@cyb3rko](https://github.com/cyb3rko) for fixing) * Docs: Fix Kustomize config in installation docs ([#1367](https://github.com/binwiederhier/ntfy/issues/1367), thanks to [@toby-griffiths](https://github.com/toby-griffiths)) * Docs: Use SVG F-Droid badge and add app store badges to README ([#1170](https://github.com/binwiederhier/ntfy/issues/1170), thanks to [@PanderMusubi](https://github.com/PanderMusubi) for reporting) \ No newline at end of file diff --git a/server/server.go b/server/server.go index 9069f3ed..820913e9 100644 --- a/server/server.go +++ b/server/server.go @@ -1463,7 +1463,8 @@ func (s *Server) handleSubscribeHTTP(w http.ResponseWriter, r *http.Request, v * // This blocks until any in-flight sub() call finishes writing/flushing the response writer, // then marks the connection as closed so future sub() calls are no-ops. This prevents a panic // from writing to a response writer that has been cleaned up after the handler returns. - // See https://github.com/binwiederhier/ntfy/issues/338#issuecomment-1163425889. + // See https://github.com/binwiederhier/ntfy/issues/338#issuecomment-1163425889 + // and https://github.com/binwiederhier/ntfy/pull/1598. wlock.Lock() closed = true wlock.Unlock() diff --git a/server/server_test.go b/server/server_test.go index a0eb14c4..a44d3880 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -3901,6 +3901,134 @@ func (m *mockResponseWriter) WriteHeader(statusCode int) { m.writeHeaderHit = true } +// closableResponseWriter simulates a real HTTP response writer that becomes invalid +// after the handler returns. In production, Go's HTTP server calls finishRequest() after +// the handler returns, which nils out the underlying bufio.Writer. Any subsequent Flush() +// from a straggler Publish goroutine causes a nil pointer panic. This mock tracks whether +// any Write or Flush occurred after the handler returned (i.e. after Close was called). +type closableResponseWriter struct { + header http.Header + mu sync.Mutex + closed bool + wroteAfterClose atomic.Bool +} + +func newClosableResponseWriter() *closableResponseWriter { + return &closableResponseWriter{ + header: make(http.Header), + } +} + +func (w *closableResponseWriter) Header() http.Header { + return w.header +} + +func (w *closableResponseWriter) Write(b []byte) (int, error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.closed { + w.wroteAfterClose.Store(true) + return 0, errors.New("write after handler returned") + } + return len(b), nil +} + +func (w *closableResponseWriter) WriteHeader(statusCode int) {} + +func (w *closableResponseWriter) Flush() { + w.mu.Lock() + defer w.mu.Unlock() + if w.closed { + w.wroteAfterClose.Store(true) + } +} + +// Close simulates Go's HTTP server cleaning up the response writer after the handler returns. +func (w *closableResponseWriter) Close() { + w.mu.Lock() + defer w.mu.Unlock() + w.closed = true +} + +func TestServer_SubscribeHTTP_NoWriteAfterHandlerReturn(t *testing.T) { + // This test reproduces the panic from https://github.com/binwiederhier/ntfy/issues/338: + // + // panic: runtime error: invalid memory address or nil pointer dereference + // bufio.(*Writer).Flush(...) + // net/http.(*response).Flush(...) + // server.(*Server).handleSubscribeHTTP.func2(...) + // server.(*topic).Publish.func1.1(...) + // + // The race: topic.Publish() copies the subscriber list and calls each subscriber in its own + // goroutine. If the subscriber disconnects, the handler returns and Go's HTTP server cleans up + // the response writer. But a Publish goroutine that copied the subscriber list BEFORE + // Unsubscribe may still call sub() AFTER the handler returns. + // + // This test deterministically reproduces the scenario by: + // 1. Subscribing via handleSubscribeHTTP (which registers a sub closure on the topic) + // 2. Copying the subscriber function from the topic (simulating what topic.Publish does) + // 3. Cancelling the subscription and waiting for the handler to fully return + // 4. Calling the copied subscriber function AFTER the handler has returned + // 5. Checking that no write/flush occurred on the (now-invalid) response writer + // + // Without the wlock+closed fix, calling the subscriber after the handler returns writes to + // the closed response writer (which in production causes a nil pointer panic on Flush). + // With the fix, the subscriber sees closed=true and returns without writing. + t.Parallel() + s := newTestServer(t, newTestConfig(t)) + + rw := newClosableResponseWriter() + ctx, cancel := context.WithCancel(context.Background()) + req, err := http.NewRequestWithContext(ctx, "GET", "/mytopic/json", nil) + require.Nil(t, err) + req.RemoteAddr = "9.9.9.9:1234" + + // Start the subscribe handler (blocks until context is cancelled) + handlerDone := make(chan struct{}) + go func() { + s.handle(rw, req) + close(handlerDone) + }() + time.Sleep(100 * time.Millisecond) // Wait for subscription to be registered + + // Grab a copy of the subscriber function from the topic, exactly as topic.Publish() does + // via subscribersCopy(). This must happen BEFORE cancel/Unsubscribe removes the subscriber. + s.mu.RLock() + tp := s.topics["mytopic"] + s.mu.RUnlock() + require.NotNil(t, tp) + subscribersCopy := tp.subscribersCopy() + require.Equal(t, 1, len(subscribersCopy)) + + var copiedSub subscriber + for _, sub := range subscribersCopy { + copiedSub = sub.subscriber + } + + // Cancel the subscription and wait for the handler to fully return. + // At this point, the deferred cleanup in handleSubscribeHTTP runs: + // - With fix: wlock.Lock() waits for in-flight sub(), sets closed=true, wlock.Unlock() + // - Without fix: nothing prevents future sub() calls from writing + cancel() + <-handlerDone + + // Simulate Go's HTTP server cleaning up the response writer after the handler returns. + // In production, this is finishRequest() which nils out the bufio.Writer. + rw.Close() + + // Now call the copied subscriber function, simulating a straggler Publish goroutine + // that copied the subscriber list before Unsubscribe ran. In production, this is exactly + // how the panic occurs: the goroutine spawned by topic.Publish calls sub() after the + // handler has already returned and Go has cleaned up the response writer. + v := newVisitor(s.config, s.messageCache, s.userManager, netip.MustParseAddr("9.9.9.9"), nil) + msg := newDefaultMessage("mytopic", "straggler message") + _ = copiedSub(v, msg) + + require.False(t, rw.wroteAfterClose.Load(), + "sub() wrote to the response writer after the handler returned; "+ + "in production this causes a nil pointer panic in bufio.(*Writer).Flush()") +} + func TestServer_HandleError_SkipsWriteHeaderOnHijackedConnection(t *testing.T) { // Test that handleError does not call WriteHeader for WebSocket errors wrapped // with errWebSocketPostUpgrade (indicating the connection was hijacked)