fix raid size calculation when sizes of devices in array vary

It turns out mdadm computes the headroom based on the first device in
the array, which means that the order of the devices matters!
This commit is contained in:
Michael Hudson-Doyle 2019-11-07 12:56:13 +13:00
parent 6b5e7adcf6
commit 233965b376
3 changed files with 56 additions and 16 deletions

View File

@ -9,6 +9,7 @@
import atexit import atexit
import os import os
import random
import shutil import shutil
import subprocess import subprocess
import sys import sys
@ -18,6 +19,7 @@ import uuid
import attr import attr
from subiquity.models.filesystem import ( from subiquity.models.filesystem import (
align_down,
dehumanize_size, dehumanize_size,
get_raid_size, get_raid_size,
humanize_size, humanize_size,
@ -71,13 +73,14 @@ def create_devices_for_sizes(sizes):
def create_raid(level, images): def create_raid(level, images):
name = '/dev/md/{}'.format(uuid.uuid4()) name = '/dev/md/test-{}'.format(uuid.uuid4())
cmd = [ cmd = [
'mdadm', 'mdadm',
'--verbose', '--verbose',
'--create', '--create',
'--metadata', 'default', '--metadata', 'default',
'--level', level, '--level', level,
'--run',
'-n', str(len(images)), '-n', str(len(images)),
'--assume-clean', '--assume-clean',
name, name,
@ -114,6 +117,8 @@ def verify_size_ok(level, sizes):
level, sz , calc_size, real_size), end=' ') level, sz , calc_size, real_size), end=' ')
if calc_size > real_size: if calc_size > real_size:
print("BAAAAAAAAAAAD", real_size - calc_size) print("BAAAAAAAAAAAD", real_size - calc_size)
print(raid)
input('waiting: ')
else: else:
print("OK by", real_size - calc_size) print("OK by", real_size - calc_size)
r = True r = True
@ -132,6 +137,11 @@ try:
if count >= level.min_devices: if count >= level.min_devices:
if not verify_size_ok(level.value, [size]*count): if not verify_size_ok(level.value, [size]*count):
fails += 1 fails += 1
if not verify_size_ok(level.value, [align_down(random.randrange(size, 10*size))]*count):
fails += 1
sizes = [align_down(random.randrange(size, 10*size)) for _ in range(count)]
if not verify_size_ok(level.value, sizes):
fails += 1
finally: finally:
run(['umount', '-l', tmpdir]) run(['umount', '-l', tmpdir])

View File

@ -218,36 +218,57 @@ def dehumanize_size(size):
return num * mult // div return num * mult // div
def round_raid_size(min_size): DEFAULT_CHUNK = 512
def calculate_data_offset(devsize):
devsize >>= 9 # convert to sectors
devsize = align_down(devsize, DEFAULT_CHUNK)
# The calculation of how much of a device mdadm uses for raid is a # The calculation of how much of a device mdadm uses for raid is a
# touch ridiculous. What follows is a translation of the code at: # touch ridiculous. What follows is a translation of the code at:
# https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770 # https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/super1.c?h=mdadm-4.1&id=20e8fe52e7190b3ffda127566852eac2eb7fa1f7#n2770
# (note that that calculation is in terms of 512-byte sectors and # (note that that calculations are in terms of 512-byte sectors).
# this one is in bytes).
# #
# This makes assumptions about the defaults mdadm uses but mostly # This makes assumptions about the defaults mdadm uses but mostly
# that the default metadata version is 1.2, and other formats use # that the default metadata version is 1.2, and other formats use
# less space. # less space.
bmspace = 128*1024
headroom = 128*1024*1024 # conversion of choose_bm_space:
while (headroom << 10) > min_size and headroom > 2*1024*1024: if devsize < 64*2:
bmspace = 0
elif devsize - 64*2 >= 200*1024*1024*2:
bmspace = 128*2
elif devsize - 4*2 > 8*1024*1024*2:
bmspace = 64*2
else:
bmspace = 4*2
headroom = 128*1024*2
while (headroom << 10) > devsize and headroom / 2 >= DEFAULT_CHUNK*2*2:
headroom >>= 1 headroom >>= 1
# mdadm's Create() can round things a little more so, to be
# pessimistic, assume another megabyte gets wasted somewhere. data_offset = 12*2 + bmspace + headroom
data_offset = align_up(12*1024 + bmspace + headroom) + 1024*1024 log.debug(
log.debug("get_raid_size: adjusting for %s bytes of overhead") "get_raid_size: adjusting for %s sectors of overhead", data_offset)
return min_size - data_offset data_offset = align_up(data_offset, 2*1024)
data_offset <<= 9 # convert back to bytes
return data_offset
# This this is tested against reality in ./scripts/get-raid-sizes.py # This this is tested against reality in ./scripts/get-raid-sizes.py
def get_raid_size(level, devices): def get_raid_size(level, devices):
if len(devices) == 0: if len(devices) == 0:
return 0 return 0
min_size = round_raid_size(min(dev.size for dev in devices)) data_offset = calculate_data_offset(devices[0].size)
sizes = [align_down(dev.size - data_offset) for dev in devices]
min_size = min(sizes)
if min_size <= 0: if min_size <= 0:
return 0 return 0
if level == "raid0": if level == "raid0":
return min_size * len(devices) return sum(sizes)
elif level == "raid1": elif level == "raid1":
return min_size return min_size
elif level == "raid5": elif level == "raid5":

View File

@ -16,15 +16,17 @@
from collections import namedtuple from collections import namedtuple
import unittest import unittest
import attr
from subiquity.models.filesystem import ( from subiquity.models.filesystem import (
Bootloader, Bootloader,
dehumanize_size, dehumanize_size,
DeviceAction, DeviceAction,
Disk, Disk,
FilesystemModel, FilesystemModel,
get_raid_size,
humanize_size, humanize_size,
Partition, Partition,
round_raid_size,
) )
@ -109,7 +111,14 @@ class TestDehumanizeSize(unittest.TestCase):
class TestRoundRaidSize(unittest.TestCase): class TestRoundRaidSize(unittest.TestCase):
def test_lp1816777(self): def test_lp1816777(self):
self.assertLessEqual(round_raid_size(500107862016), 499972571136)
@attr.s
class FakeDev:
size = attr.ib()
self.assertLessEqual(
get_raid_size("raid1", [FakeDev(500107862016)]*2),
499972571136)
FakeStorageInfo = namedtuple( FakeStorageInfo = namedtuple(