fix raid size calculation when sizes of devices in array vary

It turns out mdadm computes the headroom based on the first device in the array, which means that the order of the devices matters!
2019-11-07 12:56:13 +13:00 · 2019-11-07 12:56:13 +13:00 · 233965b376
parent 6b5e7adcf6
commit 233965b376
3 changed files with 56 additions and 16 deletions
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@ -9,6 +9,7 @@

 import atexit
 import os
+import random
 import shutil
 import subprocess
 import sys
@ -18,6 +19,7 @@ import uuid
 import attr

 from subiquity.models.filesystem import (
+    align_down,
    dehumanize_size,
    get_raid_size,
    humanize_size,
@ -71,13 +73,14 @@ def create_devices_for_sizes(sizes):


 def create_raid(level, images):
-    name = '/dev/md/{}'.format(uuid.uuid4())
+    name = '/dev/md/test-{}'.format(uuid.uuid4())
    cmd = [
        'mdadm',
        '--verbose',
        '--create',
        '--metadata', 'default',
        '--level', level,
+        '--run',
        '-n', str(len(images)),
        '--assume-clean',
        name,
@ -114,6 +117,8 @@ def verify_size_ok(level, sizes):
            level, sz , calc_size, real_size), end=' ')
        if calc_size > real_size:
            print("BAAAAAAAAAAAD", real_size - calc_size)
+            print(raid)
+            input('waiting: ')
        else:
            print("OK by", real_size - calc_size)
            r = True
@ -132,6 +137,11 @@ try:
                if count >= level.min_devices:
                    if not verify_size_ok(level.value, [size]*count):
                        fails += 1
+                    if not verify_size_ok(level.value, [align_down(random.randrange(size, 10*size))]*count):
+                        fails += 1
+                    sizes = [align_down(random.randrange(size, 10*size)) for _ in range(count)]
+                    if not verify_size_ok(level.value, sizes):
+                        fails += 1
 finally:
    run(['umount', '-l', tmpdir])

--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@ -218,36 +218,57 @@ def dehumanize_size(size):
    return num * mult // div


-def round_raid_size(min_size):
+DEFAULT_CHUNK = 512
+
+
+def calculate_data_offset(devsize):
+    devsize >>= 9  # convert to sectors
+
+    devsize = align_down(devsize, DEFAULT_CHUNK)
    # The calculation of how much of a device mdadm uses for raid is a
    # touch ridiculous. What follows is a translation of the code at:
    # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
-    # (note that that calculation is in terms of 512-byte sectors and
-    # this one is in bytes).
+    # (note that that calculations are in terms of 512-byte sectors).
    #
    # This makes assumptions about the defaults mdadm uses but mostly
    # that the default metadata version is 1.2, and other formats use
    # less space.
-    bmspace = 128*1024
-    headroom = 128*1024*1024
-    while (headroom << 10) > min_size and headroom > 2*1024*1024:
+
+    # conversion of choose_bm_space:
+    if devsize < 64*2:
+        bmspace = 0
+    elif devsize - 64*2 >= 200*1024*1024*2:
+        bmspace = 128*2
+    elif devsize - 4*2 > 8*1024*1024*2:
+        bmspace = 64*2
+    else:
+        bmspace = 4*2
+
+    headroom = 128*1024*2
+    while (headroom << 10) > devsize and headroom / 2 >= DEFAULT_CHUNK*2*2:
        headroom >>= 1
-    # mdadm's Create() can round things a little more so, to be
-    # pessimistic, assume another megabyte gets wasted somewhere.
-    data_offset = align_up(12*1024 + bmspace + headroom) + 1024*1024
-    log.debug("get_raid_size: adjusting for %s bytes of overhead")
-    return min_size - data_offset
+
+    data_offset = 12*2 + bmspace + headroom
+    log.debug(
+        "get_raid_size: adjusting for %s sectors of overhead", data_offset)
+    data_offset = align_up(data_offset, 2*1024)
+
+    data_offset <<= 9  # convert back to bytes
+
+    return data_offset


 # This this is tested against reality in ./scripts/get-raid-sizes.py
 def get_raid_size(level, devices):
    if len(devices) == 0:
        return 0
-    min_size = round_raid_size(min(dev.size for dev in devices))
+    data_offset = calculate_data_offset(devices[0].size)
+    sizes = [align_down(dev.size - data_offset) for dev in devices]
+    min_size = min(sizes)
    if min_size <= 0:
        return 0
    if level == "raid0":
-        return min_size * len(devices)
+        return sum(sizes)
    elif level == "raid1":
        return min_size
    elif level == "raid5":
--- a/subiquity/models/tests/test_filesystem.py
+++ b/subiquity/models/tests/test_filesystem.py
@ -16,15 +16,17 @@
 from collections import namedtuple
 import unittest

+import attr
+
 from subiquity.models.filesystem import (
    Bootloader,
    dehumanize_size,
    DeviceAction,
    Disk,
    FilesystemModel,
+    get_raid_size,
    humanize_size,
    Partition,
-    round_raid_size,
    )


@ -109,7 +111,14 @@ class TestDehumanizeSize(unittest.TestCase):
 class TestRoundRaidSize(unittest.TestCase):

    def test_lp1816777(self):
-        self.assertLessEqual(round_raid_size(500107862016), 499972571136)
+
+        @attr.s
+        class FakeDev:
+            size = attr.ib()
+
+        self.assertLessEqual(
+            get_raid_size("raid1", [FakeDev(500107862016)]*2),
+            499972571136)


 FakeStorageInfo = namedtuple(