From ea9af1cbc01bcb46647c3eb18456b604cadaccd3 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Wed, 6 Nov 2019 15:35:04 +1300
Subject: [PATCH 01/11] More accurate estimation of the size of a RAID

I found the code in mdadm that decides how much of a device is usable
for data and copied it. It's a bit hairy!

For https://bugs.launchpad.net/subiquity/+bug/1816777 and many similar
bugs.
---
 subiquity/models/filesystem.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index 3c4c1505..55739c91 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -218,14 +218,28 @@ def dehumanize_size(size):
     return num * mult // div
 
 
-# This is a guess!
-RAID_OVERHEAD = 8 * (1 << 20)
-
-
 def get_raid_size(level, devices):
     if len(devices) == 0:
         return 0
-    min_size = min(dev.size for dev in devices) - RAID_OVERHEAD
+    # The calculation of how much of a device mdadm uses for raid is a
+    # touch ridiculous. What follows is a translation of the code at:
+    # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
+    # (note that that calculation is in terms of 512-byte sectors and
+    # this one is in bytes).
+    #
+    # This makes assumptions about the defaults mdadm uses but mostly
+    # that the default metadata version is 1.2, and other formats use
+    # less space.
+    min_size = min(dev.size for dev in devices)
+    bmspace = 128*1024
+    headroom = 128*1024*1024
+    while (headroom << 10) > min_size and headroom > 2*1024*1024:
+        headroom >>= 1
+    # mdadm's Create() can round things a little more so, to be
+    # pessimistic, assume another megabyte gets wasted somewhere.
+    data_offset = align_up(12*1024 + bmspace + headroom) + 1024*1024
+    log.debug("get_raid_size: adjusting for %s bytes of overhead")
+    min_size -= data_offset
     if min_size <= 0:
         return 0
     if level == "raid0":
@@ -233,9 +247,9 @@ def get_raid_size(level, devices):
     elif level == "raid1":
         return min_size
     elif level == "raid5":
-        return (min_size - RAID_OVERHEAD) * (len(devices) - 1)
+        return min_size * (len(devices) - 1)
     elif level == "raid6":
-        return (min_size - RAID_OVERHEAD) * (len(devices) - 2)
+        return min_size * (len(devices) - 2)
     elif level == "raid10":
         return min_size * (len(devices) // 2)
     else:
@@ -1246,6 +1260,10 @@ class FilesystemModel(object):
         emitted_ids = set()
 
         def emit(obj):
+            if isinstance(obj, Raid):
+                log.debug(
+                    "FilesystemModel: estimated size of %s %s is %s",
+                    obj.raidlevel, obj.name, obj.size)
             r.append(asdict(obj))
             emitted_ids.add(obj.id)
 

From 53e532e602ee90f1153ad205abd1e2b24ec9df83 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Wed, 6 Nov 2019 15:57:35 +1300
Subject: [PATCH 02/11] add a test from the only bug i can find where the size
 of the resulting raid is reported

---
 subiquity/models/filesystem.py            | 13 ++++++++-----
 subiquity/models/tests/test_filesystem.py |  7 +++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index 55739c91..f47319ff 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -218,9 +218,7 @@ def dehumanize_size(size):
     return num * mult // div
 
 
-def get_raid_size(level, devices):
-    if len(devices) == 0:
-        return 0
+def round_raid_size(min_size):
     # The calculation of how much of a device mdadm uses for raid is a
     # touch ridiculous. What follows is a translation of the code at:
     # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
@@ -230,7 +228,6 @@ def get_raid_size(level, devices):
     # This makes assumptions about the defaults mdadm uses but mostly
     # that the default metadata version is 1.2, and other formats use
     # less space.
-    min_size = min(dev.size for dev in devices)
     bmspace = 128*1024
     headroom = 128*1024*1024
     while (headroom << 10) > min_size and headroom > 2*1024*1024:
@@ -239,7 +236,13 @@ def get_raid_size(level, devices):
     # pessimistic, assume another megabyte gets wasted somewhere.
     data_offset = align_up(12*1024 + bmspace + headroom) + 1024*1024
     log.debug("get_raid_size: adjusting for %s bytes of overhead")
-    min_size -= data_offset
+    return min_size - data_offset
+
+
+def get_raid_size(level, devices):
+    min_size = round_raid_size(min(dev.size for dev in devices))
+    if len(devices) == 0:
+        return 0
     if min_size <= 0:
         return 0
     if level == "raid0":
diff --git a/subiquity/models/tests/test_filesystem.py b/subiquity/models/tests/test_filesystem.py
index e71cd0ef..9d8b076c 100644
--- a/subiquity/models/tests/test_filesystem.py
+++ b/subiquity/models/tests/test_filesystem.py
@@ -24,6 +24,7 @@ from subiquity.models.filesystem import (
     FilesystemModel,
     humanize_size,
     Partition,
+    round_raid_size,
     )
 
 
@@ -105,6 +106,12 @@ class TestDehumanizeSize(unittest.TestCase):
                 self.assertEqual(expected_error, actual_error)
 
 
+class TestRoundRaidSize(unittest.TestCase):
+
+    def test_lp1816777(self):
+        self.assertLessEqual(round_raid_size(500107862016), 499972571136)
+
+
 FakeStorageInfo = namedtuple(
     'FakeStorageInfo', ['name', 'size', 'free', 'serial', 'model'])
 FakeStorageInfo.__new__.__defaults__ = (None,) * len(FakeStorageInfo._fields)

From 679b6110776bec7ae43dbd7de3b158706c9ed57c Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 10:32:09 +1300
Subject: [PATCH 03/11] add tests of get_raid_size against reality

---
 scripts/raid-size-tests.py     | 127 +++++++++++++++++++++++++++++++++
 subiquity/models/filesystem.py |   3 +-
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 scripts/raid-size-tests.py

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
new file mode 100644
index 00000000..efe50ea4
--- /dev/null
+++ b/scripts/raid-size-tests.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python3
+
+import atexit
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import uuid
+
+import attr
+
+from subiquity.models.filesystem import (
+    dehumanize_size,
+    get_raid_size,
+    humanize_size,
+    raidlevels,
+    )
+
+
+tmpdir = tempfile.mkdtemp()
+
+raids = []
+loopdevs = []
+
+_cmdoutput = []
+
+def run(cmd):
+    try:
+        subprocess.run(
+            cmd, check=True,
+            stdout=subprocess.PIPE, stdin=subprocess.DEVNULL,
+            stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        print(e.stdout)
+        raise
+
+
+def cleanup():
+    for raid in raids:
+        subprocess.run(
+            ['mdadm', '--verbose', '--stop', raid])
+    for loopdev in loopdevs:
+        subprocess.run(
+            ['losetup', '-d', loopdev])
+    shutil.rmtree(tmpdir)
+
+
+def create_devices_for_sizes(sizes):
+    devs = []
+    for size in sizes:
+        fd, name = tempfile.mkstemp(dir=tmpdir)
+        os.ftruncate(fd, size)
+        os.close(fd)
+        dev = subprocess.run(
+            ['losetup', '-f', '--show', name],
+            stdout=subprocess.PIPE, encoding='ascii').stdout.strip()
+        devs.append(dev)
+        loopdevs.append(dev)
+    return devs
+
+
+def create_raid(level, images):
+    name = '/dev/md/{}'.format(uuid.uuid4())
+    cmd = [
+        'mdadm',
+        '--verbose',
+        '--create',
+        '--metadata', 'default',
+        '--level', level,
+        '-n', str(len(images)),
+        '--assume-clean',
+        name,
+        ] + images
+    run(cmd)
+    raids.append(name)
+    return name
+
+
+def get_real_raid_size(raid):
+    return int(subprocess.run(
+        ['blockdev', '--getsize64', raid],
+        stdout=subprocess.PIPE, encoding='ascii').stdout.strip())
+
+
+@attr.s
+class FakeDev:
+    size = attr.ib()
+
+
+def verify_size_ok(level, sizes):
+    devs = create_devices_for_sizes(sizes)
+    raid = create_raid(level, devs)
+    devs = [FakeDev(size) for size in sizes]
+    calc_size = get_raid_size(level, devs)
+    real_size = get_real_raid_size(raid)
+    if len(set(sizes)) == 1:
+        sz = '[{}]*{}'.format(humanize_size(sizes[0]), len(sizes))
+    else:
+        sz = str([humanize_size(s) for s in sizes])
+    print("level {} sizes {} -> calc_size {} real_size {}".format(
+        level, sz , calc_size, real_size), end=' ')
+    if calc_size > real_size:
+        print("BAAAAAAAAAAAD", real_size - calc_size)
+        r = False
+    else:
+        print("OK by", real_size - calc_size)
+        r = True
+    run(['mdadm', '--verbose', '--stop', raid])
+    raids.remove(raid)
+    return r
+
+
+fails = 0
+for size in '1G', '10G', '100G', '1T', '10T', '100T':
+    size = dehumanize_size(size)
+    for level in raidlevels:
+        for count in range(2, 10):
+            if count >= level.min_devices:
+                if not verify_size_ok(level.value, [size]*count):
+                    fails += 1
+
+if fails > 0:
+    print("{} fails".format(fails))
+    sys.exit(1)
+else:
+    print("all ok!!")
diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index f47319ff..9a66bfd9 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -239,10 +239,11 @@ def round_raid_size(min_size):
     return min_size - data_offset
 
 
+# This this is tested against reality in ./scripts/get-raid-sizes.py
 def get_raid_size(level, devices):
-    min_size = round_raid_size(min(dev.size for dev in devices))
     if len(devices) == 0:
         return 0
+    min_size = round_raid_size(min(dev.size for dev in devices))
     if min_size <= 0:
         return 0
     if level == "raid0":

From 5da663fedee9b7ee6ca9faad7fa112c53ca4ce79 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 11:04:03 +1300
Subject: [PATCH 04/11] raid-size-tests.py: run in tmpfs, clean up better

ext4 limits files to a measly 16TiB!
---
 scripts/raid-size-tests.py | 78 +++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index efe50ea4..725a4b46 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -20,11 +20,6 @@ from subiquity.models.filesystem import (
 
 tmpdir = tempfile.mkdtemp()
 
-raids = []
-loopdevs = []
-
-_cmdoutput = []
-
 def run(cmd):
     try:
         subprocess.run(
@@ -35,15 +30,23 @@ def run(cmd):
         print(e.stdout)
         raise
 
+raids = []
+loopdevs = []
 
-def cleanup():
+def cleanraids():
     for raid in raids:
-        subprocess.run(
-            ['mdadm', '--verbose', '--stop', raid])
+        run(['mdadm', '--verbose', '--stop', raid])
+    del raids[:]
+
+def cleanloops():
     for loopdev in loopdevs:
         subprocess.run(
             ['losetup', '-d', loopdev])
-    shutil.rmtree(tmpdir)
+    del loopdevs[:]
+
+def cleanup():
+    cleanraids()
+    cleanloops()
 
 
 def create_devices_for_sizes(sizes):
@@ -89,36 +92,41 @@ class FakeDev:
 
 
 def verify_size_ok(level, sizes):
-    devs = create_devices_for_sizes(sizes)
-    raid = create_raid(level, devs)
-    devs = [FakeDev(size) for size in sizes]
-    calc_size = get_raid_size(level, devs)
-    real_size = get_real_raid_size(raid)
-    if len(set(sizes)) == 1:
-        sz = '[{}]*{}'.format(humanize_size(sizes[0]), len(sizes))
-    else:
-        sz = str([humanize_size(s) for s in sizes])
-    print("level {} sizes {} -> calc_size {} real_size {}".format(
-        level, sz , calc_size, real_size), end=' ')
-    if calc_size > real_size:
-        print("BAAAAAAAAAAAD", real_size - calc_size)
-        r = False
-    else:
-        print("OK by", real_size - calc_size)
-        r = True
-    run(['mdadm', '--verbose', '--stop', raid])
-    raids.remove(raid)
+    r = False
+    try:
+        devs = create_devices_for_sizes(sizes)
+        raid = create_raid(level, devs)
+        devs = [FakeDev(size) for size in sizes]
+        calc_size = get_raid_size(level, devs)
+        real_size = get_real_raid_size(raid)
+        if len(set(sizes)) == 1:
+            sz = '[{}]*{}'.format(humanize_size(sizes[0]), len(sizes))
+        else:
+            sz = str([humanize_size(s) for s in sizes])
+        print("level {} sizes {} -> calc_size {} real_size {}".format(
+            level, sz , calc_size, real_size), end=' ')
+        if calc_size > real_size:
+            print("BAAAAAAAAAAAD", real_size - calc_size)
+        else:
+            print("OK by", real_size - calc_size)
+            r = True
+    finally:
+        cleanup()
     return r
 
 
 fails = 0
-for size in '1G', '10G', '100G', '1T', '10T', '100T':
-    size = dehumanize_size(size)
-    for level in raidlevels:
-        for count in range(2, 10):
-            if count >= level.min_devices:
-                if not verify_size_ok(level.value, [size]*count):
-                    fails += 1
+run(['mount', '-t', 'tmpfs', 'tmpfs', tmpdir])
+try:
+    for size in '1G', '10G', '100G', '1T', '10T', '100T':
+        size = dehumanize_size(size)
+        for level in raidlevels:
+            for count in range(2, 10):
+                if count >= level.min_devices:
+                    if not verify_size_ok(level.value, [size]*count):
+                        fails += 1
+finally:
+    run(['umount', tmpdir])
 
 if fails > 0:
     print("{} fails".format(fails))

From 6b5e7adcf638a5e0c978c63ef82b9b67b1cdabd0 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 11:31:23 +1300
Subject: [PATCH 05/11] add some explanation to raid-size-tests.py

---
 scripts/raid-size-tests.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 725a4b46..45d1754b 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -1,5 +1,12 @@
 #!/usr/bin/python3
 
+# The fine details of how big a RAID device ends up as a function of the sizes
+# of its components is somewhat hairier than one might think, with a certain
+# fraction of each component device being given over to metadata storage. This
+# script tests the estimates subiquity uses against reality by creating actual
+# raid devices (backed by sparse files in a tmpfs) and comparing their sizes
+# with the estimates. It must be run as root.
+
 import atexit
 import os
 import shutil
@@ -126,7 +133,7 @@ try:
                     if not verify_size_ok(level.value, [size]*count):
                         fails += 1
 finally:
-    run(['umount', tmpdir])
+    run(['umount', '-l', tmpdir])
 
 if fails > 0:
     print("{} fails".format(fails))

From 233965b3764716812e7066c69af3778708a7344b Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 12:56:13 +1300
Subject: [PATCH 06/11] fix raid size calculation when sizes of devices in
 array vary

It turns out mdadm computes the headroom based on the first device in
the array, which means that the order of the devices matters!
---
 scripts/raid-size-tests.py                | 12 +++++-
 subiquity/models/filesystem.py            | 47 ++++++++++++++++-------
 subiquity/models/tests/test_filesystem.py | 13 ++++++-
 3 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 45d1754b..7b39cf72 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -9,6 +9,7 @@
 
 import atexit
 import os
+import random
 import shutil
 import subprocess
 import sys
@@ -18,6 +19,7 @@ import uuid
 import attr
 
 from subiquity.models.filesystem import (
+    align_down,
     dehumanize_size,
     get_raid_size,
     humanize_size,
@@ -71,13 +73,14 @@ def create_devices_for_sizes(sizes):
 
 
 def create_raid(level, images):
-    name = '/dev/md/{}'.format(uuid.uuid4())
+    name = '/dev/md/test-{}'.format(uuid.uuid4())
     cmd = [
         'mdadm',
         '--verbose',
         '--create',
         '--metadata', 'default',
         '--level', level,
+        '--run',
         '-n', str(len(images)),
         '--assume-clean',
         name,
@@ -114,6 +117,8 @@ def verify_size_ok(level, sizes):
             level, sz , calc_size, real_size), end=' ')
         if calc_size > real_size:
             print("BAAAAAAAAAAAD", real_size - calc_size)
+            print(raid)
+            input('waiting: ')
         else:
             print("OK by", real_size - calc_size)
             r = True
@@ -132,6 +137,11 @@ try:
                 if count >= level.min_devices:
                     if not verify_size_ok(level.value, [size]*count):
                         fails += 1
+                    if not verify_size_ok(level.value, [align_down(random.randrange(size, 10*size))]*count):
+                        fails += 1
+                    sizes = [align_down(random.randrange(size, 10*size)) for _ in range(count)]
+                    if not verify_size_ok(level.value, sizes):
+                        fails += 1
 finally:
     run(['umount', '-l', tmpdir])
 
diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index 9a66bfd9..ab89878d 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -218,36 +218,57 @@ def dehumanize_size(size):
     return num * mult // div
 
 
-def round_raid_size(min_size):
+DEFAULT_CHUNK = 512
+
+
+def calculate_data_offset(devsize):
+    devsize >>= 9  # convert to sectors
+
+    devsize = align_down(devsize, DEFAULT_CHUNK)
     # The calculation of how much of a device mdadm uses for raid is a
     # touch ridiculous. What follows is a translation of the code at:
     # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
-    # (note that that calculation is in terms of 512-byte sectors and
-    # this one is in bytes).
+    # (note that that calculations are in terms of 512-byte sectors).
     #
     # This makes assumptions about the defaults mdadm uses but mostly
     # that the default metadata version is 1.2, and other formats use
     # less space.
-    bmspace = 128*1024
-    headroom = 128*1024*1024
-    while (headroom << 10) > min_size and headroom > 2*1024*1024:
+
+    # conversion of choose_bm_space:
+    if devsize < 64*2:
+        bmspace = 0
+    elif devsize - 64*2 >= 200*1024*1024*2:
+        bmspace = 128*2
+    elif devsize - 4*2 > 8*1024*1024*2:
+        bmspace = 64*2
+    else:
+        bmspace = 4*2
+
+    headroom = 128*1024*2
+    while (headroom << 10) > devsize and headroom / 2 >= DEFAULT_CHUNK*2*2:
         headroom >>= 1
-    # mdadm's Create() can round things a little more so, to be
-    # pessimistic, assume another megabyte gets wasted somewhere.
-    data_offset = align_up(12*1024 + bmspace + headroom) + 1024*1024
-    log.debug("get_raid_size: adjusting for %s bytes of overhead")
-    return min_size - data_offset
+
+    data_offset = 12*2 + bmspace + headroom
+    log.debug(
+        "get_raid_size: adjusting for %s sectors of overhead", data_offset)
+    data_offset = align_up(data_offset, 2*1024)
+
+    data_offset <<= 9  # convert back to bytes
+
+    return data_offset
 
 
 # This this is tested against reality in ./scripts/get-raid-sizes.py
 def get_raid_size(level, devices):
     if len(devices) == 0:
         return 0
-    min_size = round_raid_size(min(dev.size for dev in devices))
+    data_offset = calculate_data_offset(devices[0].size)
+    sizes = [align_down(dev.size - data_offset) for dev in devices]
+    min_size = min(sizes)
     if min_size <= 0:
         return 0
     if level == "raid0":
-        return min_size * len(devices)
+        return sum(sizes)
     elif level == "raid1":
         return min_size
     elif level == "raid5":
diff --git a/subiquity/models/tests/test_filesystem.py b/subiquity/models/tests/test_filesystem.py
index 9d8b076c..c37672c1 100644
--- a/subiquity/models/tests/test_filesystem.py
+++ b/subiquity/models/tests/test_filesystem.py
@@ -16,15 +16,17 @@
 from collections import namedtuple
 import unittest
 
+import attr
+
 from subiquity.models.filesystem import (
     Bootloader,
     dehumanize_size,
     DeviceAction,
     Disk,
     FilesystemModel,
+    get_raid_size,
     humanize_size,
     Partition,
-    round_raid_size,
     )
 
 
@@ -109,7 +111,14 @@ class TestDehumanizeSize(unittest.TestCase):
 class TestRoundRaidSize(unittest.TestCase):
 
     def test_lp1816777(self):
-        self.assertLessEqual(round_raid_size(500107862016), 499972571136)
+
+        @attr.s
+        class FakeDev:
+            size = attr.ib()
+
+        self.assertLessEqual(
+            get_raid_size("raid1", [FakeDev(500107862016)]*2),
+            499972571136)
 
 
 FakeStorageInfo = namedtuple(

From fde1c53141c44d9bd44bf7dd72dc4cb907992650 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 13:25:58 +1300
Subject: [PATCH 07/11] sort raid devices consistently when computing size and
 when serializing

---
 scripts/raid-size-tests.py                | 11 +++--------
 subiquity/models/filesystem.py            | 12 ++++++++++++
 subiquity/models/tests/test_filesystem.py | 12 ++++++++----
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 7b39cf72..0a28d985 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -7,16 +7,13 @@
 # raid devices (backed by sparse files in a tmpfs) and comparing their sizes
 # with the estimates. It must be run as root.
 
-import atexit
 import os
 import random
-import shutil
 import subprocess
 import sys
 import tempfile
 import uuid
 
-import attr
 
 from subiquity.models.filesystem import (
     align_down,
@@ -25,6 +22,9 @@ from subiquity.models.filesystem import (
     humanize_size,
     raidlevels,
     )
+from subiquity.models.tests.test_filesystem import (
+    FakeDev,
+    )
 
 
 tmpdir = tempfile.mkdtemp()
@@ -96,11 +96,6 @@ def get_real_raid_size(raid):
         stdout=subprocess.PIPE, encoding='ascii').stdout.strip())
 
 
-@attr.s
-class FakeDev:
-    size = attr.ib()
-
-
 def verify_size_ok(level, sizes):
     r = False
     try:
diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index ab89878d..1f06c008 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -258,10 +258,15 @@ def calculate_data_offset(devsize):
     return data_offset
 
 
+def raid_device_sort(devices):
+    return sorted(devices, key=lambda d: d.id)
+
+
 # This this is tested against reality in ./scripts/get-raid-sizes.py
 def get_raid_size(level, devices):
     if len(devices) == 0:
         return 0
+    devices = raid_device_sort(devices)
     data_offset = calculate_data_offset(devices[0].size)
     sizes = [align_down(dev.size - data_offset) for dev in devices]
     min_size = min(sizes)
@@ -893,6 +898,13 @@ class Raid(_Device):
     name = attr.ib()
     raidlevel = attr.ib(converter=lambda x: raidlevels_by_value[x].value)
     devices = attributes.reflist(backlink="_constructed_device")
+
+    def serialize_devices(self):
+        # Surprisingly, the order of devices passed to mdadm --create
+        # matters (see get_raid_size) so we sort devices here the same
+        # way get_raid_size does.
+        return [d.id for d in raid_device_sort(self.devices)]
+
     spare_devices = attributes.reflist(backlink="_constructed_device")
 
     preserve = attr.ib(default=False)
diff --git a/subiquity/models/tests/test_filesystem.py b/subiquity/models/tests/test_filesystem.py
index c37672c1..af854468 100644
--- a/subiquity/models/tests/test_filesystem.py
+++ b/subiquity/models/tests/test_filesystem.py
@@ -19,6 +19,7 @@ import unittest
 import attr
 
 from subiquity.models.filesystem import (
+    attributes,
     Bootloader,
     dehumanize_size,
     DeviceAction,
@@ -108,14 +109,17 @@ class TestDehumanizeSize(unittest.TestCase):
                 self.assertEqual(expected_error, actual_error)
 
 
+@attr.s
+class FakeDev:
+
+    size = attr.ib()
+    id = attributes.idfield("fakedev")
+
+
 class TestRoundRaidSize(unittest.TestCase):
 
     def test_lp1816777(self):
 
-        @attr.s
-        class FakeDev:
-            size = attr.ib()
-
         self.assertLessEqual(
             get_raid_size("raid1", [FakeDev(500107862016)]*2),
             499972571136)

From d5326487945fc520c9abb19b2cb49d788f321ec0 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Fri, 8 Nov 2019 09:31:49 +1300
Subject: [PATCH 08/11] beef up comments

---
 subiquity/models/filesystem.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index 1f06c008..bf6a7f74 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -221,18 +221,24 @@ def dehumanize_size(size):
 DEFAULT_CHUNK = 512
 
 
+# The calculation of how much of a device mdadm uses for raid is more than a
+# touch ridiculous. What follows is a translation of the code at:
+# https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c,
+# specifically choose_bm_space and the end of validate_geometry1. Note that
+# that calculations are in terms of 512-byte sectors.
+#
+# We make some assumptions about the defaults mdadm uses but mostly that the
+# default metadata version is 1.2, and other formats use less space.
+#
+# Note that data_offset is computed for the first disk mdadm examines and then
+# used for all devices, so the order matters! (Well, if the size of the
+# devices vary, which is not normal but also not something we prevent).
+#
+# All this is tested against reality in ./scripts/get-raid-sizes.py
 def calculate_data_offset(devsize):
     devsize >>= 9  # convert to sectors
 
     devsize = align_down(devsize, DEFAULT_CHUNK)
-    # The calculation of how much of a device mdadm uses for raid is a
-    # touch ridiculous. What follows is a translation of the code at:
-    # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
-    # (note that that calculations are in terms of 512-byte sectors).
-    #
-    # This makes assumptions about the defaults mdadm uses but mostly
-    # that the default metadata version is 1.2, and other formats use
-    # less space.
 
     # conversion of choose_bm_space:
     if devsize < 64*2:
@@ -244,6 +250,7 @@ def calculate_data_offset(devsize):
     else:
         bmspace = 4*2
 
+    # From the end of validate_geometry1, assuming metadata 1.2.
     headroom = 128*1024*2
     while (headroom << 10) > devsize and headroom / 2 >= DEFAULT_CHUNK*2*2:
         headroom >>= 1
@@ -259,10 +266,13 @@ def calculate_data_offset(devsize):
 
 
 def raid_device_sort(devices):
+    # Because the device order matters to mdadm, we sort consistently but
+    # arbitrarily when computing the size and when rendering the config (so
+    # curtin passes the devices to mdadm in the order we calculate the size
+    # for)
     return sorted(devices, key=lambda d: d.id)
 
 
-# This this is tested against reality in ./scripts/get-raid-sizes.py
 def get_raid_size(level, devices):
     if len(devices) == 0:
         return 0

From 5d8c9834480f11f438c1265a38bf2c27c95f89ea Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Thu, 7 Nov 2019 14:21:47 +1300
Subject: [PATCH 09/11] make raid-size-tests.py a tiny bit friendlier

---
 scripts/raid-size-tests.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 0a28d985..4eb20693 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -112,10 +112,14 @@ def verify_size_ok(level, sizes):
             level, sz , calc_size, real_size), end=' ')
         if calc_size > real_size:
             print("BAAAAAAAAAAAD", real_size - calc_size)
-            print(raid)
-            input('waiting: ')
+            if os.environ.get('DEBUG'):
+                print(raid)
+                input('waiting: ')
+        elif calc_size == real_size:
+            print("exactly right!")
+            r = True
         else:
-            print("OK by", real_size - calc_size)
+            print("underestimated size by", real_size - calc_size)
             r = True
     finally:
         cleanup()

From 65a961be1ac0e80a7fb83c0a72cd3aeebf4aac11 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Fri, 8 Nov 2019 09:11:27 +1300
Subject: [PATCH 10/11] do not run the 100T raid tests, mdadm fails with
 SIGPIPE

---
 scripts/raid-size-tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 4eb20693..882bc7af 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -129,7 +129,7 @@ def verify_size_ok(level, sizes):
 fails = 0
 run(['mount', '-t', 'tmpfs', 'tmpfs', tmpdir])
 try:
-    for size in '1G', '10G', '100G', '1T', '10T', '100T':
+    for size in '1G', '10G', '100G', '1T', '10T':
         size = dehumanize_size(size)
         for level in raidlevels:
             for count in range(2, 10):

From 0141defa017333ed135fdcee3b612ae510f36736 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Fri, 8 Nov 2019 09:47:34 +1300
Subject: [PATCH 11/11] changes suggested in review

---
 scripts/raid-size-tests.py     |  2 +-
 subiquity/models/filesystem.py | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/raid-size-tests.py b/scripts/raid-size-tests.py
index 882bc7af..28e237c9 100644
--- a/scripts/raid-size-tests.py
+++ b/scripts/raid-size-tests.py
@@ -119,7 +119,7 @@ def verify_size_ok(level, sizes):
             print("exactly right!")
             r = True
         else:
-            print("underestimated size by", real_size - calc_size)
+            print("subiquity wasted space", real_size - calc_size)
             r = True
     finally:
         cleanup()
diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index bf6a7f74..82bb4291 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -235,8 +235,10 @@ DEFAULT_CHUNK = 512
 # devices vary, which is not normal but also not something we prevent).
 #
 # All this is tested against reality in ./scripts/get-raid-sizes.py
-def calculate_data_offset(devsize):
-    devsize >>= 9  # convert to sectors
+def calculate_data_offset_bytes(devsize):
+    # Convert to sectors to make it easier to compare this code to mdadm's (we
+    # convert back at the end)
+    devsize >>= 9
 
     devsize = align_down(devsize, DEFAULT_CHUNK)
 
@@ -260,9 +262,8 @@ def calculate_data_offset(devsize):
         "get_raid_size: adjusting for %s sectors of overhead", data_offset)
     data_offset = align_up(data_offset, 2*1024)
 
-    data_offset <<= 9  # convert back to bytes
-
-    return data_offset
+    # convert back to bytes
+    return data_offset << 9
 
 
 def raid_device_sort(devices):
@@ -277,7 +278,7 @@ def get_raid_size(level, devices):
     if len(devices) == 0:
         return 0
     devices = raid_device_sort(devices)
-    data_offset = calculate_data_offset(devices[0].size)
+    data_offset = calculate_data_offset_bytes(devices[0].size)
     sizes = [align_down(dev.size - data_offset) for dev in devices]
     min_size = min(sizes)
     if min_size <= 0: