|
45 | 45 | "from jupyter_client.kernelspec import KernelSpec\n", |
46 | 46 | "from jupyter_client import AsyncKernelManager\n", |
47 | 47 | "from traitlets import Type\n", |
48 | | - "import asyncio, zmq.asyncio, time" |
| 48 | + "import asyncio, zmq.asyncio, time, logging" |
49 | 49 | ] |
50 | 50 | }, |
51 | 51 | { |
|
59 | 59 | "from fastcore.utils import patch" |
60 | 60 | ] |
61 | 61 | }, |
| 62 | + { |
| 63 | + "cell_type": "code", |
| 64 | + "execution_count": null, |
| 65 | + "id": "737a0fc1", |
| 66 | + "metadata": {}, |
| 67 | + "outputs": [], |
| 68 | + "source": [ |
| 69 | + "#| export\n", |
| 70 | + "_log = logging.getLogger(__name__)" |
| 71 | + ] |
| 72 | + }, |
62 | 73 | { |
63 | 74 | "cell_type": "markdown", |
64 | 75 | "id": "665c28bb", |
|
101 | 112 | "source": [ |
102 | 113 | "#| export\n", |
103 | 114 | "class ConKernelClient(AsyncKernelClient):\n", |
| 115 | + " def _fail_pending(self, exc:Exception, skip=None):\n", |
| 116 | + " for k,(q,_) in list(getattr(self, '_pending', {}).items()):\n", |
| 117 | + " if k != skip:\n", |
| 118 | + " try: q.put_nowait(exc)\n", |
| 119 | + " except asyncio.QueueFull: pass\n", |
| 120 | + "\n", |
| 121 | + " def _check_alive(self):\n", |
| 122 | + " if not self.channels_running: raise RuntimeError(\"Channels not running\")\n", |
| 123 | + " tk = getattr(self, '_shell_reader_task', None)\n", |
| 124 | + " return tk is not None and not tk.done()\n", |
| 125 | + "\n", |
104 | 126 | " async def start_channels(self, shell:bool=True, iopub:bool=True, stdin:bool=True, hb:bool=True, control:bool=True):\n", |
105 | 127 | " \"Start channels, wait for ready, and launch background shell-reply reader\"\n", |
106 | 128 | " super().start_channels(shell=shell, iopub=iopub, stdin=stdin, hb=hb, control=control)\n", |
|
112 | 134 | " while True:\n", |
113 | 135 | " try: reply = await self.get_shell_msg(timeout=None)\n", |
114 | 136 | " except Exception as e:\n", |
115 | | - " for q in self._pending.values(): await q.put(e)\n", |
116 | | - " if self._pending: logging.warning(f\"_reader died with pending - {self._pending}: {e}\")\n", |
117 | | - " else: logging.warning(f\"_reader died with no pending: {e}\")\n", |
| 137 | + " self._fail_pending(e)\n", |
| 138 | + " _log.warning(f\"_reader died, pending={list(self._pending)}: {e}\")\n", |
118 | 139 | " break\n", |
119 | | - " q = self._pending.get(reply[\"parent_header\"].get(\"msg_id\"))\n", |
120 | | - " if q: await q.put(reply)\n", |
| 140 | + " mid = reply[\"parent_header\"].get(\"msg_id\")\n", |
| 141 | + " pend = self._pending.get(mid)\n", |
| 142 | + " if pend:\n", |
| 143 | + " q, soe = pend\n", |
| 144 | + " try: q.put_nowait(reply)\n", |
| 145 | + " except asyncio.QueueFull: pass\n", |
| 146 | + " else: _log.warning(f\"Orphan reply for {reply['parent_header'].get('msg_id')}, pending={list(self._pending)}\")\n", |
| 147 | + " cts = reply.get(\"content\", {})\n", |
| 148 | + " if cts.get(\"status\") in (\"error\", \"aborted\") and pend and soe:\n", |
| 149 | + " exc = RuntimeError(f\"Kernel error aborted: {cts.get('ename')}: {cts.get('evalue')}\")\n", |
| 150 | + " self._fail_pending(exc, skip=mid)\n", |
121 | 151 | " self._shell_reader_task = asyncio.create_task(_reader())\n", |
122 | 152 | " await _ready.wait()\n", |
123 | 153 | " await asyncio.sleep(0.2)\n", |
124 | 154 | " return self\n", |
125 | 155 | "\n", |
126 | 156 | " def stop_channels(self):\n", |
127 | 157 | " \"Stop channels and cancel the background shell-reply reader task\"\n", |
| 158 | + " self._fail_pending(RuntimeError(\"Shell channels stopped before reply\"))\n", |
128 | 159 | " super().stop_channels()\n", |
129 | 160 | " if (tk := getattr(self, '_shell_reader_task', None)):\n", |
130 | 161 | " tk.cancel()\n", |
|
133 | 164 | "\n", |
134 | 165 | " async def _async_recv_reply(self, msg_id, timeout=None, channel=\"shell\"):\n", |
135 | 166 | " if channel == \"control\": return await self._async_get_control_msg(timeout=timeout)\n", |
136 | | - " q = self._pending[msg_id]\n", |
| 167 | + " q, _ = self._pending[msg_id]\n", |
137 | 168 | " try:\n", |
138 | 169 | " res = await asyncio.wait_for(q.get(), timeout)\n", |
139 | 170 | " if isinstance(res, Exception): raise res\n", |
140 | 171 | " return res\n", |
141 | | - " except asyncio.TimeoutError as e: raise TimeoutError(\"Timeout waiting for reply\") from e\n", |
| 172 | + " except (asyncio.TimeoutError, asyncio.CancelledError) as e:\n", |
| 173 | + " _log.warning(f\"Timeout for {msg_id}, pending={list(self._pending)}\")\n", |
| 174 | + " raise TimeoutError(\"Timeout waiting for reply\") from e\n", |
142 | 175 | " finally: self._pending.pop(msg_id, None)\n", |
143 | 176 | "\n", |
144 | 177 | " def execute(self, code, user_expressions=None, allow_stdin=None, reply=False, subsh_id=None,\n", |
145 | | - " cts_typ='code', timeout=60, msg_id=None, **kw):\n", |
| 178 | + " cts_typ='code', timeout=60, msg_id=None, stop_on_error=True, **kw):\n", |
146 | 179 | " \"Send an execute request, returning a coroutine for the reply if `reply`, else the msg_id\"\n", |
| 180 | + " if not self._check_alive(): return asyncio.sleep(0) if reply else None\n", |
147 | 181 | " if user_expressions is None: user_expressions = {}\n", |
148 | 182 | " if allow_stdin is None: allow_stdin = self.allow_stdin\n", |
149 | | - " content = dict(user_expressions=user_expressions, allow_stdin=allow_stdin, subsh_id=subsh_id, **kw)\n", |
| 183 | + " content = dict(user_expressions=user_expressions, allow_stdin=allow_stdin, subsh_id=subsh_id, stop_on_error=stop_on_error, **kw)\n", |
150 | 184 | " content[cts_typ] = code\n", |
151 | 185 | " msg = self.session.msg(\"execute_request\", content)\n", |
152 | 186 | " if msg_id is not None: msg[\"header\"][\"msg_id\"] = msg_id\n", |
153 | 187 | " if subsh_id is not None: msg[\"header\"][\"subshell_id\"] = subsh_id\n", |
154 | 188 | " msg_id = msg[\"header\"][\"msg_id\"]\n", |
155 | | - " if reply: self._pending[msg_id] = asyncio.Queue(maxsize=1)\n", |
| 189 | + " if reply: self._pending[msg_id] = (asyncio.Queue(maxsize=1), stop_on_error)\n", |
156 | 190 | " self.shell_channel.send(msg)\n", |
157 | 191 | " if not reply: return msg_id\n", |
158 | 192 | " return self._async_recv_reply(msg_id, timeout=timeout)\n", |
|
212 | 246 | "execution_count": null, |
213 | 247 | "metadata": {}, |
214 | 248 | "output_type": "execute_result" |
| 249 | + }, |
| 250 | + { |
| 251 | + "name": "stderr", |
| 252 | + "output_type": "stream", |
| 253 | + "text": [ |
| 254 | + "Orphan reply for 30c03d32-82cd57ee2bae596f194f9300_34664_1, pending=[]\n" |
| 255 | + ] |
215 | 256 | } |
216 | 257 | ], |
217 | 258 | "source": [ |
|
228 | 269 | { |
229 | 270 | "data": { |
230 | 271 | "text/plain": [ |
231 | | - "'95164565-1b052c74632b03fbe217b8de_3713_1'" |
| 272 | + "'30c03d32-82cd57ee2bae596f194f9300_34664_1'" |
232 | 273 | ] |
233 | 274 | }, |
234 | 275 | "execution_count": null, |
|
293 | 334 | { |
294 | 335 | "data": { |
295 | 336 | "text/plain": [ |
296 | | - "{'msg_id': '95164565-1b052c74632b03fbe217b8de_3713_1',\n", |
| 337 | + "{'msg_id': '30c03d32-82cd57ee2bae596f194f9300_34664_1',\n", |
297 | 338 | " 'msg_type': 'execute_request',\n", |
298 | 339 | " 'username': 'jhoward',\n", |
299 | | - " 'session': '95164565-1b052c74632b03fbe217b8de',\n", |
300 | | - " 'date': datetime.datetime(2026, 2, 27, 3, 16, 39, 228656, tzinfo=tzutc()),\n", |
| 340 | + " 'session': '30c03d32-82cd57ee2bae596f194f9300',\n", |
| 341 | + " 'date': datetime.datetime(2026, 3, 5, 6, 20, 32, 255676, tzinfo=tzutc()),\n", |
301 | 342 | " 'version': '5.4'}" |
302 | 343 | ] |
303 | 344 | }, |
|
325 | 366 | "execution_count": null, |
326 | 367 | "id": "afb4f539", |
327 | 368 | "metadata": {}, |
328 | | - "outputs": [], |
| 369 | + "outputs": [ |
| 370 | + { |
| 371 | + "name": "stderr", |
| 372 | + "output_type": "stream", |
| 373 | + "text": [ |
| 374 | + "Orphan reply for 30c03d32-82cd57ee2bae596f194f9300_34664_2, pending=[]\n" |
| 375 | + ] |
| 376 | + } |
| 377 | + ], |
329 | 378 | "source": [ |
330 | 379 | "kc = await km.client().start_channels()" |
331 | 380 | ] |
|
339 | 388 | { |
340 | 389 | "data": { |
341 | 390 | "text/plain": [ |
342 | | - "{'header': {'msg_id': 'd40943ee-1c9991f3726c7b2c58e4e42c_3719_21',\n", |
| 391 | + "{'header': {'msg_id': '3560f722-f0607ae043c870acc8743b8c_34684_21',\n", |
343 | 392 | " 'msg_type': 'execute_reply',\n", |
344 | 393 | " 'username': 'jhoward',\n", |
345 | | - " 'session': 'd40943ee-1c9991f3726c7b2c58e4e42c',\n", |
346 | | - " 'date': datetime.datetime(2026, 2, 27, 3, 16, 40, 156085, tzinfo=tzutc()),\n", |
| 394 | + " 'session': '3560f722-f0607ae043c870acc8743b8c',\n", |
| 395 | + " 'date': datetime.datetime(2026, 3, 5, 6, 20, 33, 234758, tzinfo=tzutc()),\n", |
347 | 396 | " 'version': '5.4'},\n", |
348 | | - " 'msg_id': 'd40943ee-1c9991f3726c7b2c58e4e42c_3719_21',\n", |
| 397 | + " 'msg_id': '3560f722-f0607ae043c870acc8743b8c_34684_21',\n", |
349 | 398 | " 'msg_type': 'execute_reply',\n", |
350 | | - " 'parent_header': {'msg_id': '95164565-1b052c74632b03fbe217b8de_3713_1',\n", |
| 399 | + " 'parent_header': {'msg_id': '30c03d32-82cd57ee2bae596f194f9300_34664_1',\n", |
351 | 400 | " 'msg_type': 'execute_request',\n", |
352 | 401 | " 'username': 'jhoward',\n", |
353 | | - " 'session': '95164565-1b052c74632b03fbe217b8de',\n", |
354 | | - " 'date': datetime.datetime(2026, 2, 27, 3, 16, 40, 151952, tzinfo=tzutc()),\n", |
| 402 | + " 'session': '30c03d32-82cd57ee2bae596f194f9300',\n", |
| 403 | + " 'date': datetime.datetime(2026, 3, 5, 6, 20, 33, 227523, tzinfo=tzutc()),\n", |
355 | 404 | " 'version': '5.4'},\n", |
356 | | - " 'metadata': {'started': '2026-02-27T03:16:40.153095Z',\n", |
| 405 | + " 'metadata': {'started': '2026-03-05T06:20:33.230196Z',\n", |
357 | 406 | " 'dependencies_met': True,\n", |
358 | | - " 'engine': '45b26be5-508d-4fa0-8397-a8e45704f6da',\n", |
| 407 | + " 'engine': 'fce6c486-de7f-4012-87a9-ef36d7d2ec76',\n", |
359 | 408 | " 'status': 'ok'},\n", |
360 | 409 | " 'content': {'status': 'ok',\n", |
361 | 410 | " 'execution_count': 2,\n", |
|
397 | 446 | "execution_count": null, |
398 | 447 | "id": "0ccc7cca", |
399 | 448 | "metadata": {}, |
400 | | - "outputs": [], |
401 | | - "source": [ |
402 | | - "a = kc.execute('x=2', reply=True)\n", |
403 | | - "b = kc.execute('y=3', reply=True)" |
404 | | - ] |
405 | | - }, |
406 | | - { |
407 | | - "cell_type": "code", |
408 | | - "execution_count": null, |
409 | | - "id": "2739f44c", |
410 | | - "metadata": {}, |
411 | 449 | "outputs": [ |
412 | 450 | { |
413 | 451 | "data": { |
414 | 452 | "text/plain": [ |
415 | | - "'95164565-1b052c74632b03fbe217b8de_3713_5'" |
| 453 | + "'30c03d32-82cd57ee2bae596f194f9300_34664_5'" |
416 | 454 | ] |
417 | 455 | }, |
418 | 456 | "execution_count": null, |
|
421 | 459 | } |
422 | 460 | ], |
423 | 461 | "source": [ |
| 462 | + "a = kc.execute('x=2', reply=True)\n", |
| 463 | + "b = kc.execute('y=3', reply=True)\n", |
| 464 | + "\n", |
424 | 465 | "r = await asyncio.wait_for(asyncio.gather(a,b), timeout=2)\n", |
425 | 466 | "test_eq(len(r), 2)\n", |
426 | 467 | "r[0]['parent_header']['msg_id']" |
427 | 468 | ] |
428 | 469 | }, |
| 470 | + { |
| 471 | + "cell_type": "code", |
| 472 | + "execution_count": null, |
| 473 | + "id": "27a2a287", |
| 474 | + "metadata": {}, |
| 475 | + "outputs": [], |
| 476 | + "source": [ |
| 477 | + "async def g():\n", |
| 478 | + " for i in range(10): await kc.execute(f'a{i}={i}; a{i}', reply=True)\n", |
| 479 | + "\n", |
| 480 | + "r = await asyncio.wait_for(asyncio.gather(g(),g(),g(),g()), timeout=10)" |
| 481 | + ] |
| 482 | + }, |
429 | 483 | { |
430 | 484 | "cell_type": "code", |
431 | 485 | "execution_count": null, |
|
0 commit comments