storage: lengthen and log probing time

A reasonably common type of bug report is hitting the block probing
timeout.  That timeout is at 15 seconds today.  Extend that to 90
seconds, and log the time it takes.

As a performance exercise, it would be good to know where that time is
being spent and why it takes 10x longer or more for some people.
This commit is contained in:
Dan Bungert 2023-03-07 17:39:58 -07:00
parent 65e100eed9
commit 9d78aed696
1 changed files with 9 additions and 10 deletions

View File

@ -20,8 +20,8 @@ import json
import logging import logging
import os import os
import pathlib import pathlib
import platform
import select import select
import time
from typing import Dict, List, Optional from typing import Dict, List, Optional
from curtin.storage_config import ptable_uuid_to_flag_entry from curtin.storage_config import ptable_uuid_to_flag_entry
@ -823,22 +823,18 @@ class FilesystemController(SubiquityController, FilesystemManipulator):
@with_context() @with_context()
async def _probe(self, *, context=None): async def _probe(self, *, context=None):
self._errors = {} self._errors = {}
for (restricted, kind) in [ for (restricted, kind, short_label) in [
(False, ErrorReportKind.BLOCK_PROBE_FAIL), (False, ErrorReportKind.BLOCK_PROBE_FAIL, "block"),
(True, ErrorReportKind.DISK_PROBE_FAIL), (True, ErrorReportKind.DISK_PROBE_FAIL, "disk"),
]: ]:
try: try:
start = time.time()
await self._probe_once_task.start( await self._probe_once_task.start(
context=context, restricted=restricted) context=context, restricted=restricted)
# We wait on the task directly here, not # We wait on the task directly here, not
# self._probe_once_task.wait as if _probe_once_task # self._probe_once_task.wait as if _probe_once_task
# gets cancelled, we should be cancelled too. # gets cancelled, we should be cancelled too.
if platform.machine() == 'riscv64': await asyncio.wait_for(self._probe_once_task.task, 90.0)
# block probing is taking much longer on RISC-V - but why?
timeout = 60.0
else:
timeout = 15.0
await asyncio.wait_for(self._probe_once_task.task, timeout)
except asyncio.CancelledError: except asyncio.CancelledError:
# asyncio.CancelledError is a subclass of Exception in # asyncio.CancelledError is a subclass of Exception in
# Python 3.6 (sadface) # Python 3.6 (sadface)
@ -850,6 +846,9 @@ class FilesystemController(SubiquityController, FilesystemManipulator):
if report is not None: if report is not None:
self._errors[restricted] = (exc, report) self._errors[restricted] = (exc, report)
continue continue
finally:
elapsed = time.time() - start
log.debug(f'{short_label} probing took {elapsed:.1f} seconds')
break break
def run_autoinstall_guided(self, layout): def run_autoinstall_guided(self, layout):