Connection aborted.', RemoteDisconnected('Remote end closed connection without response')
Snuba service has been restarted recently. The error message is as follows:
load_entry_point('snuba', 'console_scripts', 'snuba')()
http.client.RemoteDisconnected: Remote end closed connection without response
response.begin()
File "/usr/local/lib/python3.7/http/client.py", line 306, in begin
File "/usr/local/lib/python3.7/http/client.py", line 275, in _read_status
return self.main(*args, **kwargs)
version, status, reason = self._read_status()
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 722, in call
raise RemoteDisconnected("Remote end closed connection without"
During handling of the above exception, another exception occurred:
File "/usr/local/bin/snuba", line 11, in
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 697, in main
return _process_result(sub_ctx.command.invoke(sub_ctx))
return ctx.invoke(self.callback, **ctx.params)
return callback(*args, **kwargs)
consumer.run()
File "/usr/src/snuba/snuba/cli/consumer.py", line 156, in consumer
File "/usr/src/snuba/snuba/utils/streams/batching.py", line 137, in run
File "/usr/src/snuba/snuba/utils/streams/batching.py", line 142, in _run_once
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 1066, in invoke
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 895, in invoke
File "/usr/local/lib/python3.7/site-packages/click/core.py", line 535, in invoke
self._run_once()
rv = self.invoke(ctx)
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 603, in urlopen
self._flush()
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 641, in urlopen
File "/usr/src/snuba/snuba/consumer.py", line 98, in flush_batch
self.__writer.write(inserts)
chunked=True,
self.worker.flush_batch(self.__batch_results)
_stacktrace=sys.exc_info()[2])
File "/usr/src/snuba/snuba/utils/streams/batching.py", line 242, in _flush
File "/usr/src/snuba/snuba/clickhouse/http.py", line 73, in write
raise value.with_traceback(tb)
File "/usr/local/lib/python3.7/site-packages/urllib3/util/retry.py", line 368, in increment
File "/usr/local/lib/python3.7/site-packages/urllib3/packages/six.py", line 685, in reraise
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 383, in _make_request
File "/usr/local/lib/python3.7/http/client.py", line 1344, in getresponse
File "", line 2, in raise_from
rv = real_getresponse(self, *args, **kwargs)
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.7/http/client.py", line 306, in begin
File "/usr/local/lib/python3.7/http/client.py", line 275, in _read_status
six.raise_from(e, None)
version, status, reason = self._read_status()
chunked=chunked)
File "/usr/local/lib/python3.7/site-packages/urllib3/connectionpool.py", line 387, in _make_request
File "/usr/local/lib/python3.7/site-packages/sentry_sdk/integrations/stdlib.py", line 102, in getresponse
response.begin()
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
raise RemoteDisconnected("Remote end closed connection without"`
This is because there are too many connections or the network connection is broken?
Hi Folks,
we met a similar error in several services once every several hours randomly: snuba-consumer, snuba-outcomes-consumer.
Sample Exception Stacktrace:
2021-09-04 21:23:04,560 Caught ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), shutting down...
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "", line 3, in raise_from
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.8/site-packages/sentry_sdk/integrations/stdlib.py", line 102, in getresponse
rv = real_getresponse(self, *args, **kwargs)
File "/usr/local/lib/python3.8/http/client.py", line 1348, in getresponse
response.begin()
File "/usr/local/lib/python3.8/http/client.py", line 316, in begin
version, status, reason = self._read_status()
File "/usr/local/lib/python3.8/http/client.py", line 285, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/bin/snuba", line 33, in
sys.exit(load_entry_point('snuba', 'console_scripts', 'snuba')())
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 829, in call
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/src/snuba/snuba/cli/consumer.py", line 172, in consumer
consumer.run()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/processor.py", line 108, in run
self._run_once()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/processor.py", line 143, in _run_once
self.__processing_strategy.poll()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/transform.py", line 58, in poll
self.__next_step.poll()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 119, in poll
self.__close_and_reset_batch()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 101, in __close_and_reset_batch
self.__batch.close()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 61, in close
self.__step.close()
File "/usr/src/snuba/snuba/consumers/consumer.py", line 222, in close
self.__insert_batch_writer.close()
File "/usr/src/snuba/snuba/consumers/consumer.py", line 87, in close
self.__writer.write(
File "/usr/src/snuba/snuba/clickhouse/http.py", line 245, in write
batch.join()
File "/usr/src/snuba/snuba/clickhouse/http.py", line 181, in join
response = self.__result.result(timeout)
File "/usr/local/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/local/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
File "/usr/local/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
retries = retries.increment(
File "/usr/local/lib/python3.8/site-packages/urllib3/util/retry.py", line 532, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.8/site-packages/urllib3/packages/six.py", line 769, in reraise
raise value.with_traceback(tb)
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "", line 3, in raise_from
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.8/site-packages/sentry_sdk/integrations/stdlib.py", line 102, in getresponse
rv = real_getresponse(self, *args, **kwargs)
File "/usr/local/lib/python3.8/http/client.py", line 1348, in getresponse
response.begin()
File "/usr/local/lib/python3.8/http/client.py", line 316, in begin
version, status, reason = self._read_status()
File "/usr/local/lib/python3.8/http/client.py", line 285, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
We're facing the same error, here are the logs of snuba-sessions-consumer container at the time of error (and one line before and after it):
2021-10-13 05:28:18,981 Completed processing <Batch: 1 message, open for 1.05 seconds>.
2021-10-13 05:28:21,983 Caught ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), shutting down...
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.8/site-packages/sentry_sdk/integrations/stdlib.py", line 102, in getresponse
rv = real_getresponse(self, *args, **kwargs)
File "/usr/local/lib/python3.8/http/client.py", line 1348, in getresponse
response.begin()
File "/usr/local/lib/python3.8/http/client.py", line 316, in begin
version, status, reason = self._read_status()
File "/usr/local/lib/python3.8/http/client.py", line 285, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/bin/snuba", line 33, in <module>
sys.exit(load_entry_point('snuba', 'console_scripts', 'snuba')())
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/src/snuba/snuba/cli/consumer.py", line 172, in consumer
consumer.run()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/processor.py", line 108, in run
self._run_once()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/processor.py", line 143, in _run_once
self.__processing_strategy.poll()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/transform.py", line 58, in poll
self.__next_step.poll()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 119, in poll
self.__close_and_reset_batch()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 101, in __close_and_reset_batch
self.__batch.close()
File "/usr/local/lib/python3.8/site-packages/arroyo/processing/strategies/streaming/collect.py", line 61, in close
self.__step.close()
File "/usr/src/snuba/snuba/consumers/consumer.py", line 222, in close
self.__insert_batch_writer.close()
File "/usr/src/snuba/snuba/consumers/consumer.py", line 87, in close
self.__writer.write(
File "/usr/src/snuba/snuba/clickhouse/http.py", line 245, in write
batch.join()
File "/usr/src/snuba/snuba/clickhouse/http.py", line 181, in join
response = self.__result.result(timeout)
File "/usr/local/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/local/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
File "/usr/local/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
retries = retries.increment(
File "/usr/local/lib/python3.8/site-packages/urllib3/util/retry.py", line 532, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.8/site-packages/urllib3/packages/six.py", line 769, in reraise
raise value.with_traceback(tb)
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/local/lib/python3.8/site-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.8/site-packages/sentry_sdk/integrations/stdlib.py", line 102, in getresponse
rv = real_getresponse(self, *args, **kwargs)
File "/usr/local/lib/python3.8/http/client.py", line 1348, in getresponse
response.begin()
File "/usr/local/lib/python3.8/http/client.py", line 316, in begin
version, status, reason = self._read_status()
File "/usr/local/lib/python3.8/http/client.py", line 285, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
2021-10-13 05:28:32,239 New partitions assigned: {Partition(topic=Topic(name='ingest-sessions'), index=0): 9412}
2021-10-13 05:28:33,985 Completed processing <Batch: 7 messages, open for 1.74 seconds>.
And I'm not seeing anything not normal in clickhouse logs at that time, but here it goes:
2021.10.13 05:28:04.807872 [ 14953 ] {-7724-456e-be46-2682e24a8526} <Information> executeQuery: Read 1 rows, 131.00 B in 0.033 sec., 30 rows/sec., 3.91 KiB/sec.
2021.10.13 05:28:04.810763 [ 14953 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:13.281058 [ 12369 ] {-72cd-4a52-8b8a-770fa6809f9d} <Information> executeQuery: Read 1 rows, 131.00 B in 0.033 sec., 30 rows/sec., 3.90 KiB/sec.
2021.10.13 05:28:13.282042 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:14.341500 [ 12369 ] {-35a9-4806-a76a-a0fbb61b16c4} <Information> executeQuery: Read 1 rows, 131.00 B in 0.047 sec., 21 rows/sec., 2.70 KiB/sec.
2021.10.13 05:28:14.345250 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:15.604937 [ 114 ] {-5ad1-4682-ae0b-b6806b1cf720} <Information> executeQuery: Read 2 rows, 124.00 B in 0.017 sec., 115 rows/sec., 6.99 KiB/sec.
2021.10.13 05:28:15.606807 [ 114 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:15.635220 [ 14953 ] {-8c6b-43aa-b1eb-0be470e4d7f3} <Information> executeQuery: Read 2 rows, 8.60 KiB in 0.050 sec., 39 rows/sec., 171.14 KiB/sec.
2021.10.13 05:28:15.638503 [ 14953 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:15.743978 [ 12369 ] {-89c8-469e-9edb-1589ecb3783d} <Information> executeQuery: Read 1 rows, 131.00 B in 0.031 sec., 32 rows/sec., 4.16 KiB/sec.
2021.10.13 05:28:15.744861 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:17.051431 [ 114 ] {-9a0c-4316-bbf1-24e41b4039bb} <Information> executeQuery: Read 1 rows, 71.00 B in 0.018 sec., 56 rows/sec., 3.91 KiB/sec.
2021.10.13 05:28:17.054076 [ 114 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:17.097590 [ 14953 ] {-0420-4d75-8c69-ff7796b6ad2a} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.056 sec., 17 rows/sec., 79.50 KiB/sec.
2021.10.13 05:28:17.099700 [ 14953 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:18.976578 [ 12361 ] {-319c-4eba-9450-78a2bb2ebb5d} <Information> executeQuery: Read 1 rows, 131.00 B in 0.035 sec., 28 rows/sec., 3.67 KiB/sec.
2021.10.13 05:28:18.978890 [ 12361 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:20.242723 [ 12369 ] {-e4df-47c0-aee1-dbb269edd414} <Information> executeQuery: Read 1 rows, 71.00 B in 0.018 sec., 56 rows/sec., 3.95 KiB/sec.
2021.10.13 05:28:20.244585 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:20.274800 [ 115 ] {-6635-4e53-a1af-a79c629201a9} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.052 sec., 19 rows/sec., 85.38 KiB/sec.
2021.10.13 05:28:20.275494 [ 115 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:23.248064 [ 12369 ] {-085f-44c2-aeee-fc1c2ec17324} <Information> executeQuery: Read 1 rows, 71.00 B in 0.017 sec., 59 rows/sec., 4.13 KiB/sec.
2021.10.13 05:28:23.250179 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:23.287516 [ 115 ] {-53a3-4aa3-ba77-e3a6d06799bc} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.051 sec., 19 rows/sec., 87.47 KiB/sec.
2021.10.13 05:28:23.288535 [ 115 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:24.710746 [ 115 ] {-498f-4693-b92f-23af281b15c4} <Information> executeQuery: Read 2 rows, 8.60 KiB in 0.042 sec., 47 rows/sec., 203.13 KiB/sec.
2021.10.13 05:28:24.712428 [ 115 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:25.761384 [ 12369 ] {-1e6c-4044-bd44-287296a7590a} <Information> executeQuery: Read 2 rows, 124.00 B in 0.019 sec., 104 rows/sec., 6.36 KiB/sec.
2021.10.13 05:28:25.764247 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:28.578204 [ 12369 ] {-4df9-42bf-bb12-601ab481115f} <Information> executeQuery: Read 1 rows, 71.00 B in 0.017 sec., 58 rows/sec., 4.05 KiB/sec.
2021.10.13 05:28:28.579936 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:28.615718 [ 12361 ] {-ff0c-44ba-9a6b-a74378825160} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.049 sec., 20 rows/sec., 90.78 KiB/sec.
2021.10.13 05:28:28.617867 [ 12361 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:31.780541 [ 115 ] {-71ed-4675-b624-ac901bf94f29} <Information> executeQuery: Read 1 rows, 71.00 B in 0.017 sec., 59 rows/sec., 4.14 KiB/sec.
2021.10.13 05:28:31.788077 [ 115 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:33.302492 [ 114 ] {-7f4d-44e2-a77f-fa052a26023b} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.052 sec., 19 rows/sec., 86.17 KiB/sec.
2021.10.13 05:28:33.304985 [ 114 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:33.978146 [ 12369 ] {-b41a-4042-86fd-8a188adf4637} <Information> executeQuery: Read 7 rows, 617.00 B in 0.043 sec., 163 rows/sec., 14.08 KiB/sec.
2021.10.13 05:28:33.981026 [ 12369 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:35.223748 [ 12361 ] {-c3a7-4765-8f40-f3821ae5185e} <Information> executeQuery: Read 1 rows, 71.00 B in 0.017 sec., 57 rows/sec., 4.00 KiB/sec.
2021.10.13 05:28:35.224960 [ 12361 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:35.250042 [ 114 ] {-26c4-4851-a36b-17383f6f25c4} <Information> executeQuery: Read 1 rows, 4.46 KiB in 0.044 sec., 22 rows/sec., 101.75 KiB/sec.
2021.10.13 05:28:35.250921 [ 114 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:53.774783 [ 115 ] {-9ef3-4e26-9223-527928f2f87a} <Information> executeQuery: Read 1 rows, 131.00 B in 0.034 sec., 29 rows/sec., 3.75 KiB/sec.
2021.10.13 05:28:53.777331 [ 115 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:56.916070 [ 114 ] {-b2c4-488d-9129-b45ab889589c} <Information> executeQuery: Read 1 rows, 131.00 B in 0.033 sec., 30 rows/sec., 3.91 KiB/sec.
2021.10.13 05:28:56.918244 [ 114 ] {} <Information> HTTPHandler: Done processing query
2021.10.13 05:28:59.702851 [ 114 ] {-e4bc-4fc2-a03b-5672688cd634} <Information> executeQuery: Read 1 rows, 131.00 B in 0.033 sec., 30 rows/sec., 3.87 KiB/sec.
2021.10.13 05:28:59.705071 [ 114 ] {} <Information> HTTPHandler: Done processing query
P.S: I didn't know what do those ids mean, so I removed first part of them, please tell me if they are unnecessary to troubleshoot.
Any news on this? We are facing the same exact issue as @aminvakil .
To add some context: we are using multiple workers with multiple kafka partitions and from time to time the snuba-sessions-consumer is restarting with those same errors
I'll leave some output of docker inspect.. maybe I'm doing something wrong with this container
"Env": [
"DEFAULT_BROKERS=10.14.12.201:9092,10.14.12.69:9092,10.14.12.126:9092",
"SNUBA_SETTINGS=docker",
"SENTRY_RETENTION_DAYS=90",
"SNUBA_PARTITION_COUNTS=10",
"REDIS_HOST=10.14.12.239",
"CLICKHOUSE_HOST=10.14.12.210",
"PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"LANG=C.UTF-8",
"GPG_KEY=E3FF2839C048B25C084DEBE9B26995E310250568",
"PYTHON_VERSION=3.8.12",
"PYTHON_PIP_VERSION=21.2.4",
"PYTHON_SETUPTOOLS_VERSION=57.5.0",
"PYTHON_GET_PIP_URL=https://github.com/pypa/get-pip/raw/38e54e5de07c66e875c11a1ebbdb938854625dd8/public/get-pip.py",
"PYTHON_GET_PIP_SHA256=e235c437e5c7d7524fbce3880ca39b917a73dc565e0c813465b7a7a329bb279a",
"GOSU_VERSION=1.12",
"GOSU_SHA256=0f25a21cf64e58078057adc78f38705163c1d564a959ff30a891c31917011a54",
"PIP_NO_CACHE_DIR=off",
"PIP_DISABLE_PIP_VERSION_CHECK=on",
"SNUBA_RELEASE=48c8bb1f3cf2274f9a9c0d791366815490f2614f",
"FLASK_DEBUG=0",
"PYTHONUNBUFFERED=1",
"PYTHONDONTWRITEBYTECODE=1",
"UWSGI_ENABLE_METRICS=true",
"UWSGI_NEED_PLUGIN=/var/lib/uwsgi/dogstatsd",
"UWSGI_STATS_PUSH=dogstatsd:127.0.0.1:8126",
"UWSGI_DOGSTATSD_EXTRA_TAGS=service:snuba"
"Cmd": [
"consumer",
"--storage=sessions_raw",
"--auto-offset-reset=latest",
"--max-batch-time-ms=750",
"--processes=2",
"--input-block-size=32000000",
"--output-block-size=32000000"
"Image": "getsentry/snuba:latest",
"Volumes": null,
"WorkingDir": "/usr/src/snuba",
"Entrypoint": [
"./docker_entrypoint.sh"
We had the same issue running Sentry in Kubernetes, using getsentry/sentry:22.7.0 and getsentry/snuba:22.7.0 docker images - our snuba session consumer would occasionally disconnect with the same error.
As per @BYK #855 (comment) we looked more into the connectivity to clickhouse and explored the keep-alive-timeout option https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings/#keep-alive-timeout
Increasing the keep-alive-timeout from 10 to 120 seconds fixed the issue for us. Our theory is due to the higher number of messages processed by the session consumer, it could end up sending data to a half-open connection to clickhouse when the timeout is too low. For clickhouse, we are using the docker image yandex/clickhouse-server:20.3.9.70.