Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21import os 

22import os.path 

23import sys 

24import time 

25import signal 

26import subprocess 

27import getopt 

28import datetime 

29import traceback 

30import base64 

31import zlib 

32import errno 

33import stat 

34 

35import XenAPI # pylint: disable=import-error 

36import util 

37import lvutil 

38import vhdutil 

39import lvhdutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53try: 

54 from linstorjournaler import LinstorJournaler 

55 from linstorvhdutil import LinstorVhdUtil 

56 from linstorvolumemanager import get_controller_uri 

57 from linstorvolumemanager import LinstorVolumeManager 

58 from linstorvolumemanager import LinstorVolumeManagerError 

59 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

60 

61 LINSTOR_AVAILABLE = True 

62except ImportError: 

63 LINSTOR_AVAILABLE = False 

64 

65# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

66# possible due to lvhd_stop_using_() not working correctly. However, we leave 

67# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

68# record for use by the offline tool (which makes the operation safe by pausing 

69# the VM first) 

70AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

71 

72FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

73 

74# process "lock", used simply as an indicator that a process already exists 

75# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

76# check for the fact by trying the lock). 

77LOCK_TYPE_RUNNING = "running" 

78lockRunning = None 

79 

80# process "lock" to indicate that the GC process has been activated but may not 

81# yet be running, stops a second process from being started. 

82LOCK_TYPE_GC_ACTIVE = "gc_active" 

83lockActive = None 

84 

85# Default coalesce error rate limit, in messages per minute. A zero value 

86# disables throttling, and a negative value disables error reporting. 

87DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

88 

89COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

90COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

91VAR_RUN = "/var/run/" 

92SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

93 

94N_RUNNING_AVERAGE = 10 

95 

96NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

97 

98 

99class AbortException(util.SMException): 

100 pass 

101 

102 

103################################################################################ 

104# 

105# Util 

106# 

107class Util: 

108 RET_RC = 1 

109 RET_STDOUT = 2 

110 RET_STDERR = 4 

111 

112 UUID_LEN = 36 

113 

114 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

115 

116 def log(text): 

117 util.SMlog(text, ident="SMGC") 

118 log = staticmethod(log) 

119 

120 def logException(tag): 

121 info = sys.exc_info() 

122 if info[0] == SystemExit: 122 ↛ 124line 122 didn't jump to line 124, because the condition on line 122 was never true

123 # this should not be happening when catching "Exception", but it is 

124 sys.exit(0) 

125 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

126 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

127 Util.log(" ***********************") 

128 Util.log(" * E X C E P T I O N *") 

129 Util.log(" ***********************") 

130 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

131 Util.log(tb) 

132 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

133 logException = staticmethod(logException) 

134 

135 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

136 "Execute a subprocess, then return its return code, stdout, stderr" 

137 proc = subprocess.Popen(args, 

138 stdin=subprocess.PIPE, \ 

139 stdout=subprocess.PIPE, \ 

140 stderr=subprocess.PIPE, \ 

141 shell=True, \ 

142 close_fds=True) 

143 (stdout, stderr) = proc.communicate(inputtext) 

144 stdout = str(stdout) 

145 stderr = str(stderr) 

146 rc = proc.returncode 

147 if log: 

148 Util.log("`%s`: %s" % (args, rc)) 

149 if type(expectedRC) != type([]): 

150 expectedRC = [expectedRC] 

151 if not rc in expectedRC: 

152 reason = stderr.strip() 

153 if stdout.strip(): 

154 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

155 Util.log("Failed: %s" % reason) 

156 raise util.CommandException(rc, args, reason) 

157 

158 if ret == Util.RET_RC: 

159 return rc 

160 if ret == Util.RET_STDERR: 

161 return stderr 

162 return stdout 

163 doexec = staticmethod(doexec) 

164 

165 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

166 """execute func in a separate thread and kill it if abortTest signals 

167 so""" 

168 abortSignaled = abortTest() # check now before we clear resultFlag 

169 resultFlag = IPCFlag(ns) 

170 resultFlag.clearAll() 

171 pid = os.fork() 

172 if pid: 

173 startTime = _time() 

174 try: 

175 while True: 

176 if resultFlag.test("success"): 

177 Util.log(" Child process completed successfully") 

178 resultFlag.clear("success") 

179 return 

180 if resultFlag.test("failure"): 

181 resultFlag.clear("failure") 

182 raise util.SMException("Child process exited with error") 

183 if abortTest() or abortSignaled: 

184 os.killpg(pid, signal.SIGKILL) 

185 raise AbortException("Aborting due to signal") 

186 if timeOut and _time() - startTime > timeOut: 

187 os.killpg(pid, signal.SIGKILL) 

188 resultFlag.clearAll() 

189 raise util.SMException("Timed out") 

190 time.sleep(pollInterval) 

191 finally: 

192 wait_pid = 0 

193 rc = -1 

194 count = 0 

195 while wait_pid == 0 and count < 10: 

196 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

197 if wait_pid == 0: 

198 time.sleep(2) 

199 count += 1 

200 

201 if wait_pid == 0: 

202 Util.log("runAbortable: wait for process completion timed out") 

203 else: 

204 os.setpgrp() 

205 try: 

206 if func() == ret: 

207 resultFlag.set("success") 

208 else: 

209 resultFlag.set("failure") 

210 except Exception as e: 

211 Util.log("Child process failed with : (%s)" % e) 

212 resultFlag.set("failure") 

213 Util.logException("This exception has occured") 

214 os._exit(0) 

215 runAbortable = staticmethod(runAbortable) 

216 

217 def num2str(number): 

218 for prefix in ("G", "M", "K"): 

219 if number >= Util.PREFIX[prefix]: 

220 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

221 return "%s" % number 

222 num2str = staticmethod(num2str) 

223 

224 def numBits(val): 

225 count = 0 

226 while val: 

227 count += val & 1 

228 val = val >> 1 

229 return count 

230 numBits = staticmethod(numBits) 

231 

232 def countBits(bitmap1, bitmap2): 

233 """return bit count in the bitmap produced by ORing the two bitmaps""" 

234 len1 = len(bitmap1) 

235 len2 = len(bitmap2) 

236 lenLong = len1 

237 lenShort = len2 

238 bitmapLong = bitmap1 

239 if len2 > len1: 

240 lenLong = len2 

241 lenShort = len1 

242 bitmapLong = bitmap2 

243 

244 count = 0 

245 for i in range(lenShort): 

246 val = bitmap1[i] | bitmap2[i] 

247 count += Util.numBits(val) 

248 

249 for i in range(i + 1, lenLong): 

250 val = bitmapLong[i] 

251 count += Util.numBits(val) 

252 return count 

253 countBits = staticmethod(countBits) 

254 

255 def getThisScript(): 

256 thisScript = util.get_real_path(__file__) 

257 if thisScript.endswith(".pyc"): 

258 thisScript = thisScript[:-1] 

259 return thisScript 

260 getThisScript = staticmethod(getThisScript) 

261 

262 

263################################################################################ 

264# 

265# XAPI 

266# 

267class XAPI: 

268 USER = "root" 

269 PLUGIN_ON_SLAVE = "on-slave" 

270 

271 CONFIG_SM = 0 

272 CONFIG_OTHER = 1 

273 CONFIG_ON_BOOT = 2 

274 CONFIG_ALLOW_CACHING = 3 

275 

276 CONFIG_NAME = { 

277 CONFIG_SM: "sm-config", 

278 CONFIG_OTHER: "other-config", 

279 CONFIG_ON_BOOT: "on-boot", 

280 CONFIG_ALLOW_CACHING: "allow_caching" 

281 } 

282 

283 class LookupError(util.SMException): 

284 pass 

285 

286 def getSession(): 

287 session = XenAPI.xapi_local() 

288 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

289 return session 

290 getSession = staticmethod(getSession) 

291 

292 def __init__(self, session, srUuid): 

293 self.sessionPrivate = False 

294 self.session = session 

295 if self.session is None: 

296 self.session = self.getSession() 

297 self.sessionPrivate = True 

298 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

299 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

300 self.hostUuid = util.get_this_host() 

301 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

302 self.task = None 

303 self.task_progress = {"coalescable": 0, "done": 0} 

304 

305 def __del__(self): 

306 if self.sessionPrivate: 

307 self.session.xenapi.session.logout() 

308 

309 def isPluggedHere(self): 

310 pbds = self.getAttachedPBDs() 

311 for pbdRec in pbds: 

312 if pbdRec["host"] == self._hostRef: 

313 return True 

314 return False 

315 

316 def poolOK(self): 

317 host_recs = self.session.xenapi.host.get_all_records() 

318 for host_ref, host_rec in host_recs.items(): 

319 if not host_rec["enabled"]: 

320 Util.log("Host %s not enabled" % host_rec["uuid"]) 

321 return False 

322 return True 

323 

324 def isMaster(self): 

325 if self.srRecord["shared"]: 

326 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

327 return pool["master"] == self._hostRef 

328 else: 

329 pbds = self.getAttachedPBDs() 

330 if len(pbds) < 1: 

331 raise util.SMException("Local SR not attached") 

332 elif len(pbds) > 1: 

333 raise util.SMException("Local SR multiply attached") 

334 return pbds[0]["host"] == self._hostRef 

335 

336 def getAttachedPBDs(self): 

337 """Return PBD records for all PBDs of this SR that are currently 

338 attached""" 

339 attachedPBDs = [] 

340 pbds = self.session.xenapi.PBD.get_all_records() 

341 for pbdRec in pbds.values(): 

342 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

343 attachedPBDs.append(pbdRec) 

344 return attachedPBDs 

345 

346 def getOnlineHosts(self): 

347 return util.get_online_hosts(self.session) 

348 

349 def ensureInactive(self, hostRef, args): 

350 text = self.session.xenapi.host.call_plugin( \ 

351 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

352 Util.log("call-plugin returned: '%s'" % text) 

353 

354 def getRecordHost(self, hostRef): 

355 return self.session.xenapi.host.get_record(hostRef) 

356 

357 def _getRefVDI(self, uuid): 

358 return self.session.xenapi.VDI.get_by_uuid(uuid) 

359 

360 def getRefVDI(self, vdi): 

361 return self._getRefVDI(vdi.uuid) 

362 

363 def getRecordVDI(self, uuid): 

364 try: 

365 ref = self._getRefVDI(uuid) 

366 return self.session.xenapi.VDI.get_record(ref) 

367 except XenAPI.Failure: 

368 return None 

369 

370 def singleSnapshotVDI(self, vdi): 

371 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

372 {"type": "internal"}) 

373 

374 def forgetVDI(self, srUuid, vdiUuid): 

375 """Forget the VDI, but handle the case where the VDI has already been 

376 forgotten (i.e. ignore errors)""" 

377 try: 

378 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

379 self.session.xenapi.VDI.forget(vdiRef) 

380 except XenAPI.Failure: 

381 pass 

382 

383 def getConfigVDI(self, vdi, key): 

384 kind = vdi.CONFIG_TYPE[key] 

385 if kind == self.CONFIG_SM: 

386 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

387 elif kind == self.CONFIG_OTHER: 

388 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

389 elif kind == self.CONFIG_ON_BOOT: 

390 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

391 elif kind == self.CONFIG_ALLOW_CACHING: 

392 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

393 else: 

394 assert(False) 

395 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

396 return cfg 

397 

398 def removeFromConfigVDI(self, vdi, key): 

399 kind = vdi.CONFIG_TYPE[key] 

400 if kind == self.CONFIG_SM: 

401 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

402 elif kind == self.CONFIG_OTHER: 

403 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

404 else: 

405 assert(False) 

406 

407 def addToConfigVDI(self, vdi, key, val): 

408 kind = vdi.CONFIG_TYPE[key] 

409 if kind == self.CONFIG_SM: 

410 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

411 elif kind == self.CONFIG_OTHER: 

412 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

413 else: 

414 assert(False) 

415 

416 def isSnapshot(self, vdi): 

417 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

418 

419 def markCacheSRsDirty(self): 

420 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

421 'field "local_cache_enabled" = "true"') 

422 for sr_ref in sr_refs: 

423 Util.log("Marking SR %s dirty" % sr_ref) 

424 util.set_dirty(self.session, sr_ref) 

425 

426 def srUpdate(self): 

427 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

428 abortFlag = IPCFlag(self.srRecord["uuid"]) 

429 task = self.session.xenapi.Async.SR.update(self._srRef) 

430 cancelTask = True 

431 try: 

432 for i in range(60): 

433 status = self.session.xenapi.task.get_status(task) 

434 if not status == "pending": 

435 Util.log("SR.update_asynch status changed to [%s]" % status) 

436 cancelTask = False 

437 return 

438 if abortFlag.test(FLAG_TYPE_ABORT): 

439 Util.log("Abort signalled during srUpdate, cancelling task...") 

440 try: 

441 self.session.xenapi.task.cancel(task) 

442 cancelTask = False 

443 Util.log("Task cancelled") 

444 except: 

445 pass 

446 return 

447 time.sleep(1) 

448 finally: 

449 if cancelTask: 

450 self.session.xenapi.task.cancel(task) 

451 self.session.xenapi.task.destroy(task) 

452 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

453 

454 def update_task(self): 

455 self.session.xenapi.task.set_other_config( 

456 self.task, 

457 { 

458 "applies_to": self._srRef 

459 }) 

460 total = self.task_progress['coalescable'] + self.task_progress['done'] 

461 if (total > 0): 

462 self.session.xenapi.task.set_progress( 

463 self.task, float(self.task_progress['done']) / total) 

464 

465 def create_task(self, label, description): 

466 self.task = self.session.xenapi.task.create(label, description) 

467 self.update_task() 

468 

469 def update_task_progress(self, key, value): 

470 self.task_progress[key] = value 

471 if self.task: 

472 self.update_task() 

473 

474 def set_task_status(self, status): 

475 if self.task: 

476 self.session.xenapi.task.set_status(self.task, status) 

477 

478 

479################################################################################ 

480# 

481# VDI 

482# 

483class VDI(object): 

484 """Object representing a VDI of a VHD-based SR""" 

485 

486 POLL_INTERVAL = 1 

487 POLL_TIMEOUT = 30 

488 DEVICE_MAJOR = 202 

489 DRIVER_NAME_VHD = "vhd" 

490 

491 # config keys & values 

492 DB_VHD_PARENT = "vhd-parent" 

493 DB_VDI_TYPE = "vdi_type" 

494 DB_VHD_BLOCKS = "vhd-blocks" 

495 DB_VDI_PAUSED = "paused" 

496 DB_VDI_RELINKING = "relinking" 

497 DB_VDI_ACTIVATING = "activating" 

498 DB_GC = "gc" 

499 DB_COALESCE = "coalesce" 

500 DB_LEAFCLSC = "leaf-coalesce" # config key 

501 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

502 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

503 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

504 # no space to snap-coalesce or unable to keep 

505 # up with VDI. This is not used by the SM, it 

506 # might be used by external components. 

507 DB_ONBOOT = "on-boot" 

508 ONBOOT_RESET = "reset" 

509 DB_ALLOW_CACHING = "allow_caching" 

510 

511 CONFIG_TYPE = { 

512 DB_VHD_PARENT: XAPI.CONFIG_SM, 

513 DB_VDI_TYPE: XAPI.CONFIG_SM, 

514 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

515 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

516 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

517 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

518 DB_GC: XAPI.CONFIG_OTHER, 

519 DB_COALESCE: XAPI.CONFIG_OTHER, 

520 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

521 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

522 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

523 } 

524 

525 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

526 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

527 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

528 # feasibility of leaf coalesce 

529 

530 JRN_RELINK = "relink" # journal entry type for relinking children 

531 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

532 JRN_LEAF = "leaf" # used in coalesce-leaf 

533 

534 STR_TREE_INDENT = 4 

535 

536 def __init__(self, sr, uuid, raw): 

537 self.sr = sr 

538 self.scanError = True 

539 self.uuid = uuid 

540 self.raw = raw 

541 self.fileName = "" 

542 self.parentUuid = "" 

543 self.sizeVirt = -1 

544 self._sizeVHD = -1 

545 self._sizeAllocated = -1 

546 self.hidden = False 

547 self.parent = None 

548 self.children = [] 

549 self._vdiRef = None 

550 self._clearRef() 

551 

552 @staticmethod 

553 def extractUuid(path): 

554 raise NotImplementedError("Implement in sub class") 

555 

556 def load(self, info=None): 

557 """Load VDI info""" 

558 pass # abstract 

559 

560 def getDriverName(self): 

561 return self.DRIVER_NAME_VHD 

562 

563 def getRef(self): 

564 if self._vdiRef is None: 

565 self._vdiRef = self.sr.xapi.getRefVDI(self) 

566 return self._vdiRef 

567 

568 def getConfig(self, key, default=None): 

569 config = self.sr.xapi.getConfigVDI(self, key) 

570 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 570 ↛ 571line 570 didn't jump to line 571, because the condition on line 570 was never true

571 val = config 

572 else: 

573 val = config.get(key) 

574 if val: 

575 return val 

576 return default 

577 

578 def setConfig(self, key, val): 

579 self.sr.xapi.removeFromConfigVDI(self, key) 

580 self.sr.xapi.addToConfigVDI(self, key, val) 

581 Util.log("Set %s = %s for %s" % (key, val, self)) 

582 

583 def delConfig(self, key): 

584 self.sr.xapi.removeFromConfigVDI(self, key) 

585 Util.log("Removed %s from %s" % (key, self)) 

586 

587 def ensureUnpaused(self): 

588 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

589 Util.log("Unpausing VDI %s" % self) 

590 self.unpause() 

591 

592 def pause(self, failfast=False): 

593 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

594 self.uuid, failfast): 

595 raise util.SMException("Failed to pause VDI %s" % self) 

596 

597 def _report_tapdisk_unpause_error(self): 

598 try: 

599 xapi = self.sr.xapi.session.xenapi 

600 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

601 msg_name = "failed to unpause tapdisk" 

602 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

603 "VMs using this tapdisk have lost access " \ 

604 "to the corresponding disk(s)" % self.uuid 

605 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

606 except Exception as e: 

607 util.SMlog("failed to generate message: %s" % e) 

608 

609 def unpause(self): 

610 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

611 self.uuid): 

612 self._report_tapdisk_unpause_error() 

613 raise util.SMException("Failed to unpause VDI %s" % self) 

614 

615 def refresh(self, ignoreNonexistent=True): 

616 """Pause-unpause in one step""" 

617 self.sr.lock() 

618 try: 

619 try: 

620 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 620 ↛ 622line 620 didn't jump to line 622, because the condition on line 620 was never true

621 self.sr.uuid, self.uuid): 

622 self._report_tapdisk_unpause_error() 

623 raise util.SMException("Failed to refresh %s" % self) 

624 except XenAPI.Failure as e: 

625 if util.isInvalidVDI(e) and ignoreNonexistent: 

626 Util.log("VDI %s not found, ignoring" % self) 

627 return 

628 raise 

629 finally: 

630 self.sr.unlock() 

631 

632 def isSnapshot(self): 

633 return self.sr.xapi.isSnapshot(self) 

634 

635 def isAttachedRW(self): 

636 return util.is_attached_rw( 

637 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

638 

639 def getVHDBlocks(self): 

640 val = self.updateBlockInfo() 

641 bitmap = zlib.decompress(base64.b64decode(val)) 

642 return bitmap 

643 

644 def isCoalesceable(self): 

645 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

646 return not self.scanError and \ 

647 self.parent and \ 

648 len(self.parent.children) == 1 and \ 

649 self.hidden and \ 

650 len(self.children) > 0 

651 

652 def isLeafCoalesceable(self): 

653 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

654 return not self.scanError and \ 

655 self.parent and \ 

656 len(self.parent.children) == 1 and \ 

657 not self.hidden and \ 

658 len(self.children) == 0 

659 

660 def canLiveCoalesce(self, speed): 

661 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

662 isLeafCoalesceable() already""" 

663 feasibleSize = False 

664 allowedDownTime = \ 

665 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

666 vhd_size = self.getAllocatedSize() 

667 if speed: 

668 feasibleSize = \ 

669 vhd_size // speed < allowedDownTime 

670 else: 

671 feasibleSize = \ 

672 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

673 

674 return (feasibleSize or 

675 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

676 

677 def getAllPrunable(self): 

678 if len(self.children) == 0: # base case 

679 # it is possible to have a hidden leaf that was recently coalesced 

680 # onto its parent, its children already relinked but not yet 

681 # reloaded - in which case it may not be garbage collected yet: 

682 # some tapdisks could still be using the file. 

683 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

684 return [] 

685 if not self.scanError and self.hidden: 

686 return [self] 

687 return [] 

688 

689 thisPrunable = True 

690 vdiList = [] 

691 for child in self.children: 

692 childList = child.getAllPrunable() 

693 vdiList.extend(childList) 

694 if child not in childList: 

695 thisPrunable = False 

696 

697 # We can destroy the current VDI if all childs are hidden BUT the 

698 # current VDI must be hidden too to do that! 

699 # Example in this case (after a failed live leaf coalesce): 

700 # 

701 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

702 # SMGC: [32436] b5458d61(1.000G/4.127M) 

703 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

704 # 

705 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

706 # Normally we are not in this function when the delete action is 

707 # executed but in `_liveLeafCoalesce`. 

708 

709 if not self.scanError and not self.hidden and thisPrunable: 

710 vdiList.append(self) 

711 return vdiList 

712 

713 def getSizeVHD(self): 

714 return self._sizeVHD 

715 

716 def getAllocatedSize(self): 

717 return self._sizeAllocated 

718 

719 def getTreeRoot(self): 

720 "Get the root of the tree that self belongs to" 

721 root = self 

722 while root.parent: 

723 root = root.parent 

724 return root 

725 

726 def getTreeHeight(self): 

727 "Get the height of the subtree rooted at self" 

728 if len(self.children) == 0: 

729 return 1 

730 

731 maxChildHeight = 0 

732 for child in self.children: 

733 childHeight = child.getTreeHeight() 

734 if childHeight > maxChildHeight: 

735 maxChildHeight = childHeight 

736 

737 return maxChildHeight + 1 

738 

739 def getAllLeaves(self): 

740 "Get all leaf nodes in the subtree rooted at self" 

741 if len(self.children) == 0: 

742 return [self] 

743 

744 leaves = [] 

745 for child in self.children: 

746 leaves.extend(child.getAllLeaves()) 

747 return leaves 

748 

749 def updateBlockInfo(self): 

750 val = base64.b64encode(self._queryVHDBlocks()).decode() 

751 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

752 return val 

753 

754 def rename(self, uuid): 

755 "Rename the VDI file" 

756 assert(not self.sr.vdis.get(uuid)) 

757 self._clearRef() 

758 oldUuid = self.uuid 

759 self.uuid = uuid 

760 self.children = [] 

761 # updating the children themselves is the responsibility of the caller 

762 del self.sr.vdis[oldUuid] 

763 self.sr.vdis[self.uuid] = self 

764 

765 def delete(self): 

766 "Physically delete the VDI" 

767 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

768 lock.Lock.cleanupAll(self.uuid) 

769 self._clear() 

770 

771 def getParent(self): 

772 return vhdutil.getParent(self.path, lambda x: x.strip()) 772 ↛ exitline 772 didn't run the lambda on line 772

773 

774 def repair(self, parent): 

775 vhdutil.repair(parent) 

776 

777 def __str__(self): 

778 strHidden = "" 

779 if self.hidden: 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 strHidden = "*" 

781 strSizeVirt = "?" 

782 if self.sizeVirt > 0: 782 ↛ 783line 782 didn't jump to line 783, because the condition on line 782 was never true

783 strSizeVirt = Util.num2str(self.sizeVirt) 

784 strSizeVHD = "?" 

785 if self._sizeVHD > 0: 785 ↛ 786line 785 didn't jump to line 786, because the condition on line 785 was never true

786 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

787 strSizeAllocated = "?" 

788 if self._sizeAllocated >= 0: 788 ↛ 789line 788 didn't jump to line 789, because the condition on line 788 was never true

789 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

790 strType = "" 

791 if self.raw: 

792 strType = "[RAW]" 

793 strSizeVHD = "" 

794 

795 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

796 strSizeVHD, strSizeAllocated, strType) 

797 

798 def validate(self, fast=False): 

799 if not vhdutil.check(self.path, fast=fast): 799 ↛ 800line 799 didn't jump to line 800, because the condition on line 799 was never true

800 raise util.SMException("VHD %s corrupted" % self) 

801 

802 def _clear(self): 

803 self.uuid = "" 

804 self.path = "" 

805 self.parentUuid = "" 

806 self.parent = None 

807 self._clearRef() 

808 

809 def _clearRef(self): 

810 self._vdiRef = None 

811 

812 def _doCoalesce(self): 

813 """Coalesce self onto parent. Only perform the actual coalescing of 

814 VHD, but not the subsequent relinking. We'll do that as the next step, 

815 after reloading the entire SR in case things have changed while we 

816 were coalescing""" 

817 self.validate() 

818 self.parent.validate(True) 

819 self.parent._increaseSizeVirt(self.sizeVirt) 

820 self.sr._updateSlavesOnResize(self.parent) 

821 self._coalesceVHD(0) 

822 self.parent.validate(True) 

823 #self._verifyContents(0) 

824 self.parent.updateBlockInfo() 

825 

826 def _verifyContents(self, timeOut): 

827 Util.log(" Coalesce verification on %s" % self) 

828 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

829 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

830 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

831 Util.log(" Coalesce verification succeeded") 

832 

833 def _runTapdiskDiff(self): 

834 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

835 (self.getDriverName(), self.path, \ 

836 self.parent.getDriverName(), self.parent.path) 

837 Util.doexec(cmd, 0) 

838 return True 

839 

840 def _reportCoalesceError(vdi, ce): 

841 """Reports a coalesce error to XenCenter. 

842 

843 vdi: the VDI object on which the coalesce error occured 

844 ce: the CommandException that was raised""" 

845 

846 msg_name = os.strerror(ce.code) 

847 if ce.code == errno.ENOSPC: 

848 # TODO We could add more information here, e.g. exactly how much 

849 # space is required for the particular coalesce, as well as actions 

850 # to be taken by the user and consequences of not taking these 

851 # actions. 

852 msg_body = 'Run out of space while coalescing.' 

853 elif ce.code == errno.EIO: 

854 msg_body = 'I/O error while coalescing.' 

855 else: 

856 msg_body = '' 

857 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

858 % (vdi.sr.uuid, msg_name, msg_body)) 

859 

860 # Create a XenCenter message, but don't spam. 

861 xapi = vdi.sr.xapi.session.xenapi 

862 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

863 oth_cfg = xapi.SR.get_other_config(sr_ref) 

864 if COALESCE_ERR_RATE_TAG in oth_cfg: 

865 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

866 else: 

867 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

868 

869 xcmsg = False 

870 if coalesce_err_rate == 0: 

871 xcmsg = True 

872 elif coalesce_err_rate > 0: 

873 now = datetime.datetime.now() 

874 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

875 if COALESCE_LAST_ERR_TAG in sm_cfg: 

876 # seconds per message (minimum distance in time between two 

877 # messages in seconds) 

878 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

879 last = datetime.datetime.fromtimestamp( 

880 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

881 if now - last >= spm: 

882 xapi.SR.remove_from_sm_config(sr_ref, 

883 COALESCE_LAST_ERR_TAG) 

884 xcmsg = True 

885 else: 

886 xcmsg = True 

887 if xcmsg: 

888 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

889 str(now.strftime('%s'))) 

890 if xcmsg: 

891 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

892 _reportCoalesceError = staticmethod(_reportCoalesceError) 

893 

894 def coalesce(self): 

895 # size is returned in sectors 

896 return vhdutil.coalesce(self.path) * 512 

897 

898 def _doCoalesceVHD(vdi): 

899 try: 

900 startTime = time.time() 

901 vhdSize = vdi.getAllocatedSize() 

902 coalesced_size = vdi.coalesce() 

903 endTime = time.time() 

904 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

905 except util.CommandException as ce: 

906 # We use try/except for the following piece of code because it runs 

907 # in a separate process context and errors will not be caught and 

908 # reported by anyone. 

909 try: 

910 # Report coalesce errors back to user via XC 

911 VDI._reportCoalesceError(vdi, ce) 

912 except Exception as e: 

913 util.SMlog('failed to create XenCenter message: %s' % e) 

914 raise ce 

915 except: 

916 raise 

917 _doCoalesceVHD = staticmethod(_doCoalesceVHD) 

918 

919 def _vdi_is_raw(self, vdi_path): 

920 """ 

921 Given path to vdi determine if it is raw 

922 """ 

923 uuid = self.extractUuid(vdi_path) 

924 return self.sr.vdis[uuid].raw 

925 

926 def _coalesceVHD(self, timeOut): 

927 Util.log(" Running VHD coalesce on %s" % self) 

928 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 928 ↛ exitline 928 didn't run the lambda on line 928

929 try: 

930 util.fistpoint.activate_custom_fn( 

931 "cleanup_coalesceVHD_inject_failure", 

932 util.inject_failure) 

933 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

934 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

935 except: 

936 #exception at this phase could indicate a failure in vhd coalesce 

937 # or a kill of vhd coalesce by runAbortable due to timeOut 

938 # Try a repair and reraise the exception 

939 parent = "" 

940 try: 

941 parent = self.getParent() 

942 if not self._vdi_is_raw(parent): 

943 # Repair error is logged and ignored. Error reraised later 

944 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

945 'parent %s' % (self.uuid, parent)) 

946 self.repair(parent) 

947 except Exception as e: 

948 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

949 'after failed coalesce on %s, err: %s' % 

950 (parent, self.path, e)) 

951 raise 

952 

953 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

954 

955 def _relinkSkip(self): 

956 """Relink children of this VDI to point to the parent of this VDI""" 

957 abortFlag = IPCFlag(self.sr.uuid) 

958 for child in self.children: 

959 if abortFlag.test(FLAG_TYPE_ABORT): 959 ↛ 960line 959 didn't jump to line 960, because the condition on line 959 was never true

960 raise AbortException("Aborting due to signal") 

961 Util.log(" Relinking %s from %s to %s" % \ 

962 (child, self, self.parent)) 

963 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

964 child._setParent(self.parent) 

965 self.children = [] 

966 

967 def _reloadChildren(self, vdiSkip): 

968 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

969 the VHD metadata for this file in any online VDI""" 

970 abortFlag = IPCFlag(self.sr.uuid) 

971 for child in self.children: 

972 if child == vdiSkip: 

973 continue 

974 if abortFlag.test(FLAG_TYPE_ABORT): 974 ↛ 975line 974 didn't jump to line 975, because the condition on line 974 was never true

975 raise AbortException("Aborting due to signal") 

976 Util.log(" Reloading VDI %s" % child) 

977 child._reload() 

978 

979 def _reload(self): 

980 """Pause & unpause to cause blktap to reload the VHD metadata""" 

981 for child in self.children: 981 ↛ 982line 981 didn't jump to line 982, because the loop on line 981 never started

982 child._reload() 

983 

984 # only leaves can be attached 

985 if len(self.children) == 0: 985 ↛ exitline 985 didn't return from function '_reload', because the condition on line 985 was never false

986 try: 

987 self.delConfig(VDI.DB_VDI_RELINKING) 

988 except XenAPI.Failure as e: 

989 if not util.isInvalidVDI(e): 

990 raise 

991 self.refresh() 

992 

993 def _tagChildrenForRelink(self): 

994 if len(self.children) == 0: 

995 retries = 0 

996 try: 

997 while retries < 15: 

998 retries += 1 

999 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1000 Util.log("VDI %s is activating, wait to relink" % 

1001 self.uuid) 

1002 else: 

1003 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1004 

1005 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 Util.log("VDI %s started activating while tagging" % 

1008 self.uuid) 

1009 else: 

1010 return 

1011 time.sleep(2) 

1012 

1013 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1014 except XenAPI.Failure as e: 

1015 if not util.isInvalidVDI(e): 

1016 raise 

1017 

1018 for child in self.children: 

1019 child._tagChildrenForRelink() 

1020 

1021 def _loadInfoParent(self): 

1022 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1023 if ret: 

1024 self.parentUuid = ret 

1025 

1026 def _setParent(self, parent): 

1027 vhdutil.setParent(self.path, parent.path, False) 

1028 self.parent = parent 

1029 self.parentUuid = parent.uuid 

1030 parent.children.append(self) 

1031 try: 

1032 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1033 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1034 (self.uuid, self.parentUuid)) 

1035 except: 

1036 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1037 (self.uuid, self.parentUuid)) 

1038 

1039 def _loadInfoHidden(self): 

1040 hidden = vhdutil.getHidden(self.path) 

1041 self.hidden = (hidden != 0) 

1042 

1043 def _setHidden(self, hidden=True): 

1044 vhdutil.setHidden(self.path, hidden) 

1045 self.hidden = hidden 

1046 

1047 def _increaseSizeVirt(self, size, atomic=True): 

1048 """ensure the virtual size of 'self' is at least 'size'. Note that 

1049 resizing a VHD must always be offline and atomically: the file must 

1050 not be open by anyone and no concurrent operations may take place. 

1051 Thus we use the Agent API call for performing paused atomic 

1052 operations. If the caller is already in the atomic context, it must 

1053 call with atomic = False""" 

1054 if self.sizeVirt >= size: 1054 ↛ 1056line 1054 didn't jump to line 1056, because the condition on line 1054 was never false

1055 return 

1056 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1057 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1058 

1059 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1060 if (size <= msize): 

1061 vhdutil.setSizeVirtFast(self.path, size) 

1062 else: 

1063 if atomic: 

1064 vdiList = self._getAllSubtree() 

1065 self.sr.lock() 

1066 try: 

1067 self.sr.pauseVDIs(vdiList) 

1068 try: 

1069 self._setSizeVirt(size) 

1070 finally: 

1071 self.sr.unpauseVDIs(vdiList) 

1072 finally: 

1073 self.sr.unlock() 

1074 else: 

1075 self._setSizeVirt(size) 

1076 

1077 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1078 

1079 def _setSizeVirt(self, size): 

1080 """WARNING: do not call this method directly unless all VDIs in the 

1081 subtree are guaranteed to be unplugged (and remain so for the duration 

1082 of the operation): this operation is only safe for offline VHDs""" 

1083 jFile = os.path.join(self.sr.path, self.uuid) 

1084 vhdutil.setSizeVirt(self.path, size, jFile) 

1085 

1086 def _queryVHDBlocks(self): 

1087 return vhdutil.getBlockBitmap(self.path) 

1088 

1089 def _getCoalescedSizeData(self): 

1090 """Get the data size of the resulting VHD if we coalesce self onto 

1091 parent. We calculate the actual size by using the VHD block allocation 

1092 information (as opposed to just adding up the two VHD sizes to get an 

1093 upper bound)""" 

1094 # make sure we don't use stale BAT info from vdi_rec since the child 

1095 # was writable all this time 

1096 self.delConfig(VDI.DB_VHD_BLOCKS) 

1097 blocksChild = self.getVHDBlocks() 

1098 blocksParent = self.parent.getVHDBlocks() 

1099 numBlocks = Util.countBits(blocksChild, blocksParent) 

1100 Util.log("Num combined blocks = %d" % numBlocks) 

1101 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1102 assert(sizeData <= self.sizeVirt) 

1103 return sizeData 

1104 

1105 def _calcExtraSpaceForCoalescing(self): 

1106 sizeData = self._getCoalescedSizeData() 

1107 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1108 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1109 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1110 return sizeCoalesced - self.parent.getSizeVHD() 

1111 

1112 def _calcExtraSpaceForLeafCoalescing(self): 

1113 """How much extra space in the SR will be required to 

1114 [live-]leaf-coalesce this VDI""" 

1115 # the space requirements are the same as for inline coalesce 

1116 return self._calcExtraSpaceForCoalescing() 

1117 

1118 def _calcExtraSpaceForSnapshotCoalescing(self): 

1119 """How much extra space in the SR will be required to 

1120 snapshot-coalesce this VDI""" 

1121 return self._calcExtraSpaceForCoalescing() + \ 

1122 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1123 

1124 def _getAllSubtree(self): 

1125 """Get self and all VDIs in the subtree of self as a flat list""" 

1126 vdiList = [self] 

1127 for child in self.children: 

1128 vdiList.extend(child._getAllSubtree()) 

1129 return vdiList 

1130 

1131 

1132class FileVDI(VDI): 

1133 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1134 

1135 @staticmethod 

1136 def extractUuid(path): 

1137 path = os.path.basename(path.strip()) 

1138 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1138 ↛ 1140line 1138 didn't jump to line 1140, because the condition on line 1138 was never true

1139 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1140 return None 

1141 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1142 vhdutil.FILE_EXTN_RAW, "") 

1143 # TODO: validate UUID format 

1144 return uuid 

1145 

1146 def __init__(self, sr, uuid, raw): 

1147 VDI.__init__(self, sr, uuid, raw) 

1148 if self.raw: 1148 ↛ 1149line 1148 didn't jump to line 1149, because the condition on line 1148 was never true

1149 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1150 else: 

1151 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1152 

1153 def load(self, info=None): 

1154 if not info: 

1155 if not util.pathexists(self.path): 

1156 raise util.SMException("%s not found" % self.path) 

1157 try: 

1158 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1159 except util.SMException: 

1160 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1161 return 

1162 self.parent = None 

1163 self.children = [] 

1164 self.parentUuid = info.parentUuid 

1165 self.sizeVirt = info.sizeVirt 

1166 self._sizeVHD = info.sizePhys 

1167 self._sizeAllocated = info.sizeAllocated 

1168 self.hidden = info.hidden 

1169 self.scanError = False 

1170 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1171 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1172 

1173 def rename(self, uuid): 

1174 oldPath = self.path 

1175 VDI.rename(self, uuid) 

1176 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1177 self.path = os.path.join(self.sr.path, self.fileName) 

1178 assert(not util.pathexists(self.path)) 

1179 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1180 os.rename(oldPath, self.path) 

1181 

1182 def delete(self): 

1183 if len(self.children) > 0: 1183 ↛ 1184line 1183 didn't jump to line 1184, because the condition on line 1183 was never true

1184 raise util.SMException("VDI %s has children, can't delete" % \ 

1185 self.uuid) 

1186 try: 

1187 self.sr.lock() 

1188 try: 

1189 os.unlink(self.path) 

1190 self.sr.forgetVDI(self.uuid) 

1191 finally: 

1192 self.sr.unlock() 

1193 except OSError: 

1194 raise util.SMException("os.unlink(%s) failed" % self.path) 

1195 VDI.delete(self) 

1196 

1197 

1198class LVHDVDI(VDI): 

1199 """Object representing a VDI in an LVHD SR""" 

1200 

1201 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1202 DRIVER_NAME_RAW = "aio" 

1203 

1204 def load(self, vdiInfo): 

1205 self.parent = None 

1206 self.children = [] 

1207 self._sizeVHD = -1 

1208 self._sizeAllocated = -1 

1209 self.scanError = vdiInfo.scanError 

1210 self.sizeLV = vdiInfo.sizeLV 

1211 self.sizeVirt = vdiInfo.sizeVirt 

1212 self.fileName = vdiInfo.lvName 

1213 self.lvActive = vdiInfo.lvActive 

1214 self.lvOpen = vdiInfo.lvOpen 

1215 self.lvReadonly = vdiInfo.lvReadonly 

1216 self.hidden = vdiInfo.hidden 

1217 self.parentUuid = vdiInfo.parentUuid 

1218 self.path = os.path.join(self.sr.path, self.fileName) 

1219 

1220 @staticmethod 

1221 def extractUuid(path): 

1222 return lvhdutil.extractUuid(path) 

1223 

1224 def getDriverName(self): 

1225 if self.raw: 

1226 return self.DRIVER_NAME_RAW 

1227 return self.DRIVER_NAME_VHD 

1228 

1229 def inflate(self, size): 

1230 """inflate the LV containing the VHD to 'size'""" 

1231 if self.raw: 

1232 return 

1233 self._activate() 

1234 self.sr.lock() 

1235 try: 

1236 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1237 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1238 finally: 

1239 self.sr.unlock() 

1240 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1241 self._sizeVHD = -1 

1242 self._sizeAllocated = -1 

1243 

1244 def deflate(self): 

1245 """deflate the LV containing the VHD to minimum""" 

1246 if self.raw: 

1247 return 

1248 self._activate() 

1249 self.sr.lock() 

1250 try: 

1251 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1252 finally: 

1253 self.sr.unlock() 

1254 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1255 self._sizeVHD = -1 

1256 self._sizeAllocated = -1 

1257 

1258 def inflateFully(self): 

1259 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1260 

1261 def inflateParentForCoalesce(self): 

1262 """Inflate the parent only as much as needed for the purposes of 

1263 coalescing""" 

1264 if self.parent.raw: 

1265 return 

1266 inc = self._calcExtraSpaceForCoalescing() 

1267 if inc > 0: 

1268 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1269 self.parent.inflate(self.parent.sizeLV + inc) 

1270 

1271 def updateBlockInfo(self): 

1272 if not self.raw: 

1273 return VDI.updateBlockInfo(self) 

1274 

1275 def rename(self, uuid): 

1276 oldUuid = self.uuid 

1277 oldLVName = self.fileName 

1278 VDI.rename(self, uuid) 

1279 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1280 if self.raw: 

1281 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1282 self.path = os.path.join(self.sr.path, self.fileName) 

1283 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1284 

1285 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1286 if self.sr.lvActivator.get(oldUuid, False): 

1287 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1288 

1289 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1290 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1291 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1292 RefCounter.reset(oldUuid, ns) 

1293 

1294 def delete(self): 

1295 if len(self.children) > 0: 

1296 raise util.SMException("VDI %s has children, can't delete" % \ 

1297 self.uuid) 

1298 self.sr.lock() 

1299 try: 

1300 self.sr.lvmCache.remove(self.fileName) 

1301 self.sr.forgetVDI(self.uuid) 

1302 finally: 

1303 self.sr.unlock() 

1304 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1305 VDI.delete(self) 

1306 

1307 def getSizeVHD(self): 

1308 if self._sizeVHD == -1: 

1309 self._loadInfoSizeVHD() 

1310 return self._sizeVHD 

1311 

1312 def _loadInfoSizeVHD(self): 

1313 """Get the physical utilization of the VHD file. We do it individually 

1314 (and not using the VHD batch scanner) as an optimization: this info is 

1315 relatively expensive and we need it only for VDI's involved in 

1316 coalescing.""" 

1317 if self.raw: 

1318 return 

1319 self._activate() 

1320 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1321 if self._sizeVHD <= 0: 

1322 raise util.SMException("phys size of %s = %d" % \ 

1323 (self, self._sizeVHD)) 

1324 

1325 def getAllocatedSize(self): 

1326 if self._sizeAllocated == -1: 

1327 self._loadInfoSizeAllocated() 

1328 return self._sizeAllocated 

1329 

1330 def _loadInfoSizeAllocated(self): 

1331 """ 

1332 Get the allocated size of the VHD volume. 

1333 """ 

1334 if self.raw: 

1335 return 

1336 self._activate() 

1337 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1338 

1339 def _loadInfoHidden(self): 

1340 if self.raw: 

1341 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1342 else: 

1343 VDI._loadInfoHidden(self) 

1344 

1345 def _setHidden(self, hidden=True): 

1346 if self.raw: 

1347 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1348 self.hidden = hidden 

1349 else: 

1350 VDI._setHidden(self, hidden) 

1351 

1352 def __str__(self): 

1353 strType = "VHD" 

1354 if self.raw: 

1355 strType = "RAW" 

1356 strHidden = "" 

1357 if self.hidden: 

1358 strHidden = "*" 

1359 strSizeVHD = "" 

1360 if self._sizeVHD > 0: 

1361 strSizeVHD = Util.num2str(self._sizeVHD) 

1362 strSizeAllocated = "" 

1363 if self._sizeAllocated >= 0: 

1364 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1365 strActive = "n" 

1366 if self.lvActive: 

1367 strActive = "a" 

1368 if self.lvOpen: 

1369 strActive += "o" 

1370 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1371 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1372 Util.num2str(self.sizeLV), strActive) 

1373 

1374 def validate(self, fast=False): 

1375 if not self.raw: 

1376 VDI.validate(self, fast) 

1377 

1378 def _doCoalesce(self): 

1379 """LVHD parents must first be activated, inflated, and made writable""" 

1380 try: 

1381 self._activateChain() 

1382 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1383 self.parent.validate() 

1384 self.inflateParentForCoalesce() 

1385 VDI._doCoalesce(self) 

1386 finally: 

1387 self.parent._loadInfoSizeVHD() 

1388 self.parent.deflate() 

1389 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1390 

1391 def _setParent(self, parent): 

1392 self._activate() 

1393 if self.lvReadonly: 

1394 self.sr.lvmCache.setReadonly(self.fileName, False) 

1395 

1396 try: 

1397 vhdutil.setParent(self.path, parent.path, parent.raw) 

1398 finally: 

1399 if self.lvReadonly: 

1400 self.sr.lvmCache.setReadonly(self.fileName, True) 

1401 self._deactivate() 

1402 self.parent = parent 

1403 self.parentUuid = parent.uuid 

1404 parent.children.append(self) 

1405 try: 

1406 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1407 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1408 (self.uuid, self.parentUuid)) 

1409 except: 

1410 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1411 (self.parentUuid, self.uuid)) 

1412 

1413 def _activate(self): 

1414 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1415 

1416 def _activateChain(self): 

1417 vdi = self 

1418 while vdi: 

1419 vdi._activate() 

1420 vdi = vdi.parent 

1421 

1422 def _deactivate(self): 

1423 self.sr.lvActivator.deactivate(self.uuid, False) 

1424 

1425 def _increaseSizeVirt(self, size, atomic=True): 

1426 "ensure the virtual size of 'self' is at least 'size'" 

1427 self._activate() 

1428 if not self.raw: 

1429 VDI._increaseSizeVirt(self, size, atomic) 

1430 return 

1431 

1432 # raw VDI case 

1433 offset = self.sizeLV 

1434 if self.sizeVirt < size: 

1435 oldSize = self.sizeLV 

1436 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1437 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1438 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1439 offset = oldSize 

1440 unfinishedZero = False 

1441 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1442 if jval: 

1443 unfinishedZero = True 

1444 offset = int(jval) 

1445 length = self.sizeLV - offset 

1446 if not length: 

1447 return 

1448 

1449 if unfinishedZero: 

1450 Util.log(" ==> Redoing unfinished zeroing out") 

1451 else: 

1452 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1453 str(offset)) 

1454 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1455 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1456 func = lambda: util.zeroOut(self.path, offset, length) 

1457 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1458 VDI.POLL_INTERVAL, 0) 

1459 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1460 

1461 def _setSizeVirt(self, size): 

1462 """WARNING: do not call this method directly unless all VDIs in the 

1463 subtree are guaranteed to be unplugged (and remain so for the duration 

1464 of the operation): this operation is only safe for offline VHDs""" 

1465 self._activate() 

1466 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1467 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1468 try: 

1469 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1470 size, jFile) 

1471 finally: 

1472 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1473 

1474 def _queryVHDBlocks(self): 

1475 self._activate() 

1476 return VDI._queryVHDBlocks(self) 

1477 

1478 def _calcExtraSpaceForCoalescing(self): 

1479 if self.parent.raw: 

1480 return 0 # raw parents are never deflated in the first place 

1481 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1482 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1483 return sizeCoalesced - self.parent.sizeLV 

1484 

1485 def _calcExtraSpaceForLeafCoalescing(self): 

1486 """How much extra space in the SR will be required to 

1487 [live-]leaf-coalesce this VDI""" 

1488 # we can deflate the leaf to minimize the space requirements 

1489 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1490 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1491 

1492 def _calcExtraSpaceForSnapshotCoalescing(self): 

1493 return self._calcExtraSpaceForCoalescing() + \ 

1494 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1495 

1496 

1497class LinstorVDI(VDI): 

1498 """Object representing a VDI in a LINSTOR SR""" 

1499 

1500 VOLUME_LOCK_TIMEOUT = 30 

1501 

1502 def load(self, info=None): 

1503 self.parentUuid = info.parentUuid 

1504 self.scanError = True 

1505 self.parent = None 

1506 self.children = [] 

1507 

1508 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1509 self.path = self.sr._linstor.build_device_path(self.fileName) 

1510 

1511 if not info: 

1512 try: 

1513 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1514 except util.SMException: 

1515 Util.log( 

1516 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1517 ) 

1518 return 

1519 

1520 self.parentUuid = info.parentUuid 

1521 self.sizeVirt = info.sizeVirt 

1522 self._sizeVHD = -1 

1523 self._sizeAllocated = -1 

1524 self.drbd_size = -1 

1525 self.hidden = info.hidden 

1526 self.scanError = False 

1527 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1528 

1529 def getSizeVHD(self, fetch=False): 

1530 if self._sizeVHD < 0 or fetch: 

1531 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1532 return self._sizeVHD 

1533 

1534 def getDrbdSize(self, fetch=False): 

1535 if self.drbd_size < 0 or fetch: 

1536 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1537 return self.drbd_size 

1538 

1539 def getAllocatedSize(self): 

1540 if self._sizeAllocated == -1: 

1541 if not self.raw: 

1542 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1543 return self._sizeAllocated 

1544 

1545 def inflate(self, size): 

1546 if self.raw: 

1547 return 

1548 self.sr.lock() 

1549 try: 

1550 # Ensure we use the real DRBD size and not the cached one. 

1551 # Why? Because this attribute can be changed if volume is resized by user. 

1552 self.drbd_size = self.getDrbdSize(fetch=True) 

1553 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1554 finally: 

1555 self.sr.unlock() 

1556 self.drbd_size = -1 

1557 self._sizeVHD = -1 

1558 self._sizeAllocated = -1 

1559 

1560 def deflate(self): 

1561 if self.raw: 

1562 return 

1563 self.sr.lock() 

1564 try: 

1565 # Ensure we use the real sizes and not the cached info. 

1566 self.drbd_size = self.getDrbdSize(fetch=True) 

1567 self._sizeVHD = self.getSizeVHD(fetch=True) 

1568 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1569 finally: 

1570 self.sr.unlock() 

1571 self.drbd_size = -1 

1572 self._sizeVHD = -1 

1573 self._sizeAllocated = -1 

1574 

1575 def inflateFully(self): 

1576 if not self.raw: 

1577 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1578 

1579 def rename(self, uuid): 

1580 Util.log('Renaming {} -> {} (path={})'.format( 

1581 self.uuid, uuid, self.path 

1582 )) 

1583 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1584 VDI.rename(self, uuid) 

1585 

1586 def delete(self): 

1587 if len(self.children) > 0: 

1588 raise util.SMException( 

1589 'VDI {} has children, can\'t delete'.format(self.uuid) 

1590 ) 

1591 self.sr.lock() 

1592 try: 

1593 self.sr._linstor.destroy_volume(self.uuid) 

1594 self.sr.forgetVDI(self.uuid) 

1595 finally: 

1596 self.sr.unlock() 

1597 VDI.delete(self) 

1598 

1599 def validate(self, fast=False): 

1600 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1601 raise util.SMException('VHD {} corrupted'.format(self)) 

1602 

1603 def pause(self, failfast=False): 

1604 self.sr._linstor.ensure_volume_is_not_locked( 

1605 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1606 ) 

1607 return super(LinstorVDI, self).pause(failfast) 

1608 

1609 def coalesce(self): 

1610 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1611 # Using another exception we can't execute the next coalesce calls. 

1612 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1613 

1614 def getParent(self): 

1615 return self.sr._vhdutil.get_parent( 

1616 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1617 ) 

1618 

1619 def repair(self, parent_uuid): 

1620 self.sr._vhdutil.force_repair( 

1621 self.sr._linstor.get_device_path(parent_uuid) 

1622 ) 

1623 

1624 def _relinkSkip(self): 

1625 abortFlag = IPCFlag(self.sr.uuid) 

1626 for child in self.children: 

1627 if abortFlag.test(FLAG_TYPE_ABORT): 

1628 raise AbortException('Aborting due to signal') 

1629 Util.log( 

1630 ' Relinking {} from {} to {}'.format( 

1631 child, self, self.parent 

1632 ) 

1633 ) 

1634 

1635 session = child.sr.xapi.session 

1636 sr_uuid = child.sr.uuid 

1637 vdi_uuid = child.uuid 

1638 try: 

1639 self.sr._linstor.ensure_volume_is_not_locked( 

1640 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1641 ) 

1642 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1643 child._setParent(self.parent) 

1644 finally: 

1645 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1646 self.children = [] 

1647 

1648 def _setParent(self, parent): 

1649 self.sr._linstor.get_device_path(self.uuid) 

1650 self.sr._vhdutil.force_parent(self.path, parent.path) 

1651 self.parent = parent 

1652 self.parentUuid = parent.uuid 

1653 parent.children.append(self) 

1654 try: 

1655 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1656 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1657 (self.uuid, self.parentUuid)) 

1658 except: 

1659 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1660 (self.uuid, self.parentUuid)) 

1661 

1662 def _doCoalesce(self): 

1663 try: 

1664 self._activateChain() 

1665 self.parent.validate() 

1666 self._inflateParentForCoalesce() 

1667 VDI._doCoalesce(self) 

1668 finally: 

1669 self.parent.deflate() 

1670 

1671 def _activateChain(self): 

1672 vdi = self 

1673 while vdi: 

1674 try: 

1675 p = self.sr._linstor.get_device_path(vdi.uuid) 

1676 except Exception as e: 

1677 # Use SMException to skip coalesce. 

1678 # Otherwise the GC is stopped... 

1679 raise util.SMException(str(e)) 

1680 vdi = vdi.parent 

1681 

1682 def _setHidden(self, hidden=True): 

1683 HIDDEN_TAG = 'hidden' 

1684 

1685 if self.raw: 

1686 self.sr._linstor.update_volume_metadata(self.uuid, { 

1687 HIDDEN_TAG: hidden 

1688 }) 

1689 self.hidden = hidden 

1690 else: 

1691 VDI._setHidden(self, hidden) 

1692 

1693 def _setSizeVirt(self, size): 

1694 jfile = self.uuid + '-jvhd' 

1695 self.sr._linstor.create_volume( 

1696 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1697 ) 

1698 try: 

1699 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1700 self.sr._vhdutil.set_size_virt(size, jfile) 

1701 finally: 

1702 try: 

1703 self.sr._linstor.destroy_volume(jfile) 

1704 except Exception: 

1705 # We can ignore it, in any case this volume is not persistent. 

1706 pass 

1707 

1708 def _queryVHDBlocks(self): 

1709 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1710 

1711 def _inflateParentForCoalesce(self): 

1712 if self.parent.raw: 

1713 return 

1714 inc = self._calcExtraSpaceForCoalescing() 

1715 if inc > 0: 

1716 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1717 

1718 def _calcExtraSpaceForCoalescing(self): 

1719 if self.parent.raw: 

1720 return 0 

1721 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1722 self._getCoalescedSizeData(), self.vdi_type 

1723 ) 

1724 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1725 return size_coalesced - self.parent.getDrbdSize() 

1726 

1727 def _calcExtraSpaceForLeafCoalescing(self): 

1728 assert self.getDrbdSize() > 0 

1729 assert self.getSizeVHD() > 0 

1730 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1731 assert deflate_diff >= 0 

1732 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1733 

1734 def _calcExtraSpaceForSnapshotCoalescing(self): 

1735 assert self.getSizeVHD() > 0 

1736 return self._calcExtraSpaceForCoalescing() + \ 

1737 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1738 

1739################################################################################ 

1740# 

1741# SR 

1742# 

1743class SR(object): 

1744 class LogFilter: 

1745 def __init__(self, sr): 

1746 self.sr = sr 

1747 self.stateLogged = False 

1748 self.prevState = {} 

1749 self.currState = {} 

1750 

1751 def logState(self): 

1752 changes = "" 

1753 self.currState.clear() 

1754 for vdi in self.sr.vdiTrees: 

1755 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1756 if not self.prevState.get(vdi.uuid) or \ 

1757 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1758 changes += self.currState[vdi.uuid] 

1759 

1760 for uuid in self.prevState: 

1761 if not self.currState.get(uuid): 

1762 changes += "Tree %s gone\n" % uuid 

1763 

1764 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1765 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1766 

1767 if len(changes) > 0: 

1768 if self.stateLogged: 

1769 result += "showing only VHD trees that changed:" 

1770 result += "\n%s" % changes 

1771 else: 

1772 result += "no changes" 

1773 

1774 for line in result.split("\n"): 

1775 Util.log("%s" % line) 

1776 self.prevState.clear() 

1777 for key, val in self.currState.items(): 

1778 self.prevState[key] = val 

1779 self.stateLogged = True 

1780 

1781 def logNewVDI(self, uuid): 

1782 if self.stateLogged: 

1783 Util.log("Found new VDI when scanning: %s" % uuid) 

1784 

1785 def _getTreeStr(self, vdi, indent=8): 

1786 treeStr = "%s%s\n" % (" " * indent, vdi) 

1787 for child in vdi.children: 

1788 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1789 return treeStr 

1790 

1791 TYPE_FILE = "file" 

1792 TYPE_LVHD = "lvhd" 

1793 TYPE_LINSTOR = "linstor" 

1794 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1795 

1796 LOCK_RETRY_INTERVAL = 3 

1797 LOCK_RETRY_ATTEMPTS = 20 

1798 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1799 

1800 SCAN_RETRY_ATTEMPTS = 3 

1801 

1802 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1803 TMP_RENAME_PREFIX = "OLD_" 

1804 

1805 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1806 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1807 

1808 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1809 xapi = XAPI(xapiSession, uuid) 

1810 type = normalizeType(xapi.srRecord["type"]) 

1811 if type == SR.TYPE_FILE: 

1812 return FileSR(uuid, xapi, createLock, force) 

1813 elif type == SR.TYPE_LVHD: 

1814 return LVHDSR(uuid, xapi, createLock, force) 

1815 elif type == SR.TYPE_LINSTOR: 

1816 return LinstorSR(uuid, xapi, createLock, force) 

1817 raise util.SMException("SR type %s not recognized" % type) 

1818 getInstance = staticmethod(getInstance) 

1819 

1820 def __init__(self, uuid, xapi, createLock, force): 

1821 self.logFilter = self.LogFilter(self) 

1822 self.uuid = uuid 

1823 self.path = "" 

1824 self.name = "" 

1825 self.vdis = {} 

1826 self.vdiTrees = [] 

1827 self.journaler = None 

1828 self.xapi = xapi 

1829 self._locked = 0 

1830 self._srLock = None 

1831 if createLock: 1831 ↛ 1832line 1831 didn't jump to line 1832, because the condition on line 1831 was never true

1832 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1833 else: 

1834 Util.log("Requested no SR locking") 

1835 self.name = self.xapi.srRecord["name_label"] 

1836 self._failedCoalesceTargets = [] 

1837 

1838 if not self.xapi.isPluggedHere(): 

1839 if force: 1839 ↛ 1840line 1839 didn't jump to line 1840, because the condition on line 1839 was never true

1840 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1841 else: 

1842 if not self.wait_for_plug(): 

1843 raise util.SMException("SR %s not attached on this host" % uuid) 

1844 

1845 if force: 1845 ↛ 1846line 1845 didn't jump to line 1846, because the condition on line 1845 was never true

1846 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1847 elif not self.xapi.isMaster(): 1847 ↛ 1848line 1847 didn't jump to line 1848, because the condition on line 1847 was never true

1848 raise util.SMException("This host is NOT master, will not run") 

1849 

1850 def wait_for_plug(self): 

1851 for _ in range(1, 10): 

1852 time.sleep(2) 

1853 if self.xapi.isPluggedHere(): 

1854 return True 

1855 return False 

1856 

1857 def gcEnabled(self, refresh=True): 

1858 if refresh: 

1859 self.xapi.srRecord = \ 

1860 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

1861 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

1862 Util.log("GC is disabled for this SR, abort") 

1863 return False 

1864 return True 

1865 

1866 def scan(self, force=False): 

1867 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

1868 update VDI objects if they already exist""" 

1869 pass # abstract 

1870 

1871 def scanLocked(self, force=False): 

1872 self.lock() 

1873 try: 

1874 self.scan(force) 

1875 finally: 

1876 self.unlock() 

1877 

1878 def getVDI(self, uuid): 

1879 return self.vdis.get(uuid) 

1880 

1881 def hasWork(self): 

1882 if len(self.findGarbage()) > 0: 

1883 return True 

1884 if self.findCoalesceable(): 

1885 return True 

1886 if self.findLeafCoalesceable(): 

1887 return True 

1888 if self.needUpdateBlockInfo(): 

1889 return True 

1890 return False 

1891 

1892 def findCoalesceable(self): 

1893 """Find a coalesceable VDI. Return a vdi that should be coalesced 

1894 (choosing one among all coalesceable candidates according to some 

1895 criteria) or None if there is no VDI that could be coalesced""" 

1896 

1897 candidates = [] 

1898 

1899 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

1900 if srSwitch == "false": 

1901 Util.log("Coalesce disabled for this SR") 

1902 return candidates 

1903 

1904 # finish any VDI for which a relink journal entry exists first 

1905 journals = self.journaler.getAll(VDI.JRN_RELINK) 

1906 for uuid in journals: 

1907 vdi = self.getVDI(uuid) 

1908 if vdi and vdi not in self._failedCoalesceTargets: 

1909 return vdi 

1910 

1911 for vdi in self.vdis.values(): 

1912 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

1913 candidates.append(vdi) 

1914 Util.log("%s is coalescable" % vdi.uuid) 

1915 

1916 self.xapi.update_task_progress("coalescable", len(candidates)) 

1917 

1918 # pick one in the tallest tree 

1919 treeHeight = dict() 

1920 for c in candidates: 

1921 height = c.getTreeRoot().getTreeHeight() 

1922 if treeHeight.get(height): 

1923 treeHeight[height].append(c) 

1924 else: 

1925 treeHeight[height] = [c] 

1926 

1927 freeSpace = self.getFreeSpace() 

1928 heights = list(treeHeight.keys()) 

1929 heights.sort(reverse=True) 

1930 for h in heights: 

1931 for c in treeHeight[h]: 

1932 spaceNeeded = c._calcExtraSpaceForCoalescing() 

1933 if spaceNeeded <= freeSpace: 

1934 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

1935 return c 

1936 else: 

1937 Util.log("No space to coalesce %s (free space: %d)" % \ 

1938 (c, freeSpace)) 

1939 return None 

1940 

1941 def getSwitch(self, key): 

1942 return self.xapi.srRecord["other_config"].get(key) 

1943 

1944 def forbiddenBySwitch(self, switch, condition, fail_msg): 

1945 srSwitch = self.getSwitch(switch) 

1946 ret = False 

1947 if srSwitch: 

1948 ret = srSwitch == condition 

1949 

1950 if ret: 

1951 Util.log(fail_msg) 

1952 

1953 return ret 

1954 

1955 def leafCoalesceForbidden(self): 

1956 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

1957 "false", 

1958 "Coalesce disabled for this SR") or 

1959 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

1960 VDI.LEAFCLSC_DISABLED, 

1961 "Leaf-coalesce disabled for this SR")) 

1962 

1963 def findLeafCoalesceable(self): 

1964 """Find leaf-coalesceable VDIs in each VHD tree""" 

1965 

1966 candidates = [] 

1967 if self.leafCoalesceForbidden(): 

1968 return candidates 

1969 

1970 self.gatherLeafCoalesceable(candidates) 

1971 

1972 self.xapi.update_task_progress("coalescable", len(candidates)) 

1973 

1974 freeSpace = self.getFreeSpace() 

1975 for candidate in candidates: 

1976 # check the space constraints to see if leaf-coalesce is actually 

1977 # feasible for this candidate 

1978 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

1979 spaceNeededLive = spaceNeeded 

1980 if spaceNeeded > freeSpace: 

1981 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

1982 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

1983 spaceNeeded = spaceNeededLive 

1984 

1985 if spaceNeeded <= freeSpace: 

1986 Util.log("Leaf-coalesce candidate: %s" % candidate) 

1987 return candidate 

1988 else: 

1989 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

1990 (candidate, freeSpace)) 

1991 if spaceNeededLive <= freeSpace: 

1992 Util.log("...but enough space if skip snap-coalesce") 

1993 candidate.setConfig(VDI.DB_LEAFCLSC, 

1994 VDI.LEAFCLSC_OFFLINE) 

1995 

1996 return None 

1997 

1998 def gatherLeafCoalesceable(self, candidates): 

1999 for vdi in self.vdis.values(): 

2000 if not vdi.isLeafCoalesceable(): 

2001 continue 

2002 if vdi in self._failedCoalesceTargets: 

2003 continue 

2004 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2005 Util.log("Skipping reset-on-boot %s" % vdi) 

2006 continue 

2007 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2008 Util.log("Skipping allow_caching=true %s" % vdi) 

2009 continue 

2010 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2011 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2012 continue 

2013 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2014 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2015 continue 

2016 candidates.append(vdi) 

2017 

2018 def coalesce(self, vdi, dryRun=False): 

2019 """Coalesce vdi onto parent""" 

2020 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2021 if dryRun: 2021 ↛ 2022line 2021 didn't jump to line 2022, because the condition on line 2021 was never true

2022 return 

2023 

2024 try: 

2025 self._coalesce(vdi) 

2026 except util.SMException as e: 

2027 if isinstance(e, AbortException): 2027 ↛ 2028line 2027 didn't jump to line 2028, because the condition on line 2027 was never true

2028 self.cleanup() 

2029 raise 

2030 else: 

2031 self._failedCoalesceTargets.append(vdi) 

2032 Util.logException("coalesce") 

2033 Util.log("Coalesce failed, skipping") 

2034 self.cleanup() 

2035 

2036 def coalesceLeaf(self, vdi, dryRun=False): 

2037 """Leaf-coalesce vdi onto parent""" 

2038 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2039 if dryRun: 

2040 return 

2041 

2042 try: 

2043 uuid = vdi.uuid 

2044 try: 

2045 # "vdi" object will no longer be valid after this call 

2046 self._coalesceLeaf(vdi) 

2047 finally: 

2048 vdi = self.getVDI(uuid) 

2049 if vdi: 

2050 vdi.delConfig(vdi.DB_LEAFCLSC) 

2051 except AbortException: 

2052 self.cleanup() 

2053 raise 

2054 except (util.SMException, XenAPI.Failure) as e: 

2055 self._failedCoalesceTargets.append(vdi) 

2056 Util.logException("leaf-coalesce") 

2057 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2058 self.cleanup() 

2059 

2060 def garbageCollect(self, dryRun=False): 

2061 vdiList = self.findGarbage() 

2062 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2063 for vdi in vdiList: 

2064 Util.log(" %s" % vdi) 

2065 if not dryRun: 

2066 self.deleteVDIs(vdiList) 

2067 self.cleanupJournals(dryRun) 

2068 

2069 def findGarbage(self): 

2070 vdiList = [] 

2071 for vdi in self.vdiTrees: 

2072 vdiList.extend(vdi.getAllPrunable()) 

2073 return vdiList 

2074 

2075 def deleteVDIs(self, vdiList): 

2076 for vdi in vdiList: 

2077 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2078 raise AbortException("Aborting due to signal") 

2079 Util.log("Deleting unlinked VDI %s" % vdi) 

2080 self.deleteVDI(vdi) 

2081 

2082 def deleteVDI(self, vdi): 

2083 assert(len(vdi.children) == 0) 

2084 del self.vdis[vdi.uuid] 

2085 if vdi.parent: 2085 ↛ 2087line 2085 didn't jump to line 2087, because the condition on line 2085 was never false

2086 vdi.parent.children.remove(vdi) 

2087 if vdi in self.vdiTrees: 2087 ↛ 2088line 2087 didn't jump to line 2088, because the condition on line 2087 was never true

2088 self.vdiTrees.remove(vdi) 

2089 vdi.delete() 

2090 

2091 def forgetVDI(self, vdiUuid): 

2092 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2093 

2094 def pauseVDIs(self, vdiList): 

2095 paused = [] 

2096 failed = False 

2097 for vdi in vdiList: 

2098 try: 

2099 vdi.pause() 

2100 paused.append(vdi) 

2101 except: 

2102 Util.logException("pauseVDIs") 

2103 failed = True 

2104 break 

2105 

2106 if failed: 

2107 self.unpauseVDIs(paused) 

2108 raise util.SMException("Failed to pause VDIs") 

2109 

2110 def unpauseVDIs(self, vdiList): 

2111 failed = False 

2112 for vdi in vdiList: 

2113 try: 

2114 vdi.unpause() 

2115 except: 

2116 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2117 failed = True 

2118 if failed: 

2119 raise util.SMException("Failed to unpause VDIs") 

2120 

2121 def getFreeSpace(self): 

2122 return 0 

2123 

2124 def cleanup(self): 

2125 Util.log("In cleanup") 

2126 return 

2127 

2128 def __str__(self): 

2129 if self.name: 

2130 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2131 else: 

2132 ret = "%s" % self.uuid 

2133 return ret 

2134 

2135 def lock(self): 

2136 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2137 signal to avoid deadlocking (trying to acquire the SR lock while the 

2138 lock is held by a process that is trying to abort us)""" 

2139 if not self._srLock: 

2140 return 

2141 

2142 if self._locked == 0: 

2143 abortFlag = IPCFlag(self.uuid) 

2144 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2145 if self._srLock.acquireNoblock(): 

2146 self._locked += 1 

2147 return 

2148 if abortFlag.test(FLAG_TYPE_ABORT): 

2149 raise AbortException("Abort requested") 

2150 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2151 raise util.SMException("Unable to acquire the SR lock") 

2152 

2153 self._locked += 1 

2154 

2155 def unlock(self): 

2156 if not self._srLock: 2156 ↛ 2158line 2156 didn't jump to line 2158, because the condition on line 2156 was never false

2157 return 

2158 assert(self._locked > 0) 

2159 self._locked -= 1 

2160 if self._locked == 0: 

2161 self._srLock.release() 

2162 

2163 def needUpdateBlockInfo(self): 

2164 for vdi in self.vdis.values(): 

2165 if vdi.scanError or len(vdi.children) == 0: 

2166 continue 

2167 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2168 return True 

2169 return False 

2170 

2171 def updateBlockInfo(self): 

2172 for vdi in self.vdis.values(): 

2173 if vdi.scanError or len(vdi.children) == 0: 

2174 continue 

2175 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2176 vdi.updateBlockInfo() 

2177 

2178 def cleanupCoalesceJournals(self): 

2179 """Remove stale coalesce VDI indicators""" 

2180 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2181 for uuid, jval in entries.items(): 

2182 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2183 

2184 def cleanupJournals(self, dryRun=False): 

2185 """delete journal entries for non-existing VDIs""" 

2186 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2187 entries = self.journaler.getAll(t) 

2188 for uuid, jval in entries.items(): 

2189 if self.getVDI(uuid): 

2190 continue 

2191 if t == SR.JRN_CLONE: 

2192 baseUuid, clonUuid = jval.split("_") 

2193 if self.getVDI(baseUuid): 

2194 continue 

2195 Util.log(" Deleting stale '%s' journal entry for %s " 

2196 "(%s)" % (t, uuid, jval)) 

2197 if not dryRun: 

2198 self.journaler.remove(t, uuid) 

2199 

2200 def cleanupCache(self, maxAge=-1): 

2201 return 0 

2202 

2203 def _coalesce(self, vdi): 

2204 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2204 ↛ 2207line 2204 didn't jump to line 2207, because the condition on line 2204 was never true

2205 # this means we had done the actual coalescing already and just 

2206 # need to finish relinking and/or refreshing the children 

2207 Util.log("==> Coalesce apparently already done: skipping") 

2208 else: 

2209 # JRN_COALESCE is used to check which VDI is being coalesced in 

2210 # order to decide whether to abort the coalesce. We remove the 

2211 # journal as soon as the VHD coalesce step is done, because we 

2212 # don't expect the rest of the process to take long 

2213 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2214 vdi._doCoalesce() 

2215 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2216 

2217 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2218 

2219 # we now need to relink the children: lock the SR to prevent ops 

2220 # like SM.clone from manipulating the VDIs we'll be relinking and 

2221 # rescan the SR first in case the children changed since the last 

2222 # scan 

2223 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2224 

2225 self.lock() 

2226 try: 

2227 vdi.parent._tagChildrenForRelink() 

2228 self.scan() 

2229 vdi._relinkSkip() 

2230 finally: 

2231 self.unlock() 

2232 # Reload the children to leave things consistent 

2233 vdi.parent._reloadChildren(vdi) 

2234 

2235 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2236 self.deleteVDI(vdi) 

2237 

2238 class CoalesceTracker: 

2239 GRACE_ITERATIONS = 1 

2240 MAX_ITERATIONS_NO_PROGRESS = 3 

2241 MAX_ITERATIONS = 10 

2242 MAX_INCREASE_FROM_MINIMUM = 1.2 

2243 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2244 " --> Final size {finSize}" 

2245 

2246 def __init__(self, sr): 

2247 self.itsNoProgress = 0 

2248 self.its = 0 

2249 self.minSize = float("inf") 

2250 self.history = [] 

2251 self.reason = "" 

2252 self.startSize = None 

2253 self.finishSize = None 

2254 self.sr = sr 

2255 

2256 def abortCoalesce(self, prevSize, curSize): 

2257 res = False 

2258 

2259 self.its += 1 

2260 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2261 initSize=prevSize, 

2262 finSize=curSize)) 

2263 

2264 self.finishSize = curSize 

2265 

2266 if self.startSize is None: 

2267 self.startSize = prevSize 

2268 

2269 if curSize < self.minSize: 

2270 self.minSize = curSize 

2271 

2272 if prevSize < self.minSize: 

2273 self.minSize = prevSize 

2274 

2275 if prevSize < curSize: 

2276 self.itsNoProgress += 1 

2277 Util.log("No progress, attempt:" 

2278 " {attempt}".format(attempt=self.itsNoProgress)) 

2279 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2280 

2281 if (not res) and (self.its > self.MAX_ITERATIONS): 

2282 max = self.MAX_ITERATIONS 

2283 self.reason = \ 

2284 "Max iterations ({max}) exceeded".format(max=max) 

2285 res = True 

2286 

2287 if (not res) and (self.itsNoProgress > 

2288 self.MAX_ITERATIONS_NO_PROGRESS): 

2289 max = self.MAX_ITERATIONS_NO_PROGRESS 

2290 self.reason = \ 

2291 "No progress made for {max} iterations".format(max=max) 

2292 res = True 

2293 

2294 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2295 if (self.its > self.GRACE_ITERATIONS and 

2296 (not res) and (curSize > maxSizeFromMin)): 

2297 self.reason = "Unexpected bump in size," \ 

2298 " compared to minimum acheived" 

2299 res = True 

2300 

2301 return res 

2302 

2303 def printReasoning(self): 

2304 Util.log("Aborted coalesce") 

2305 for hist in self.history: 

2306 Util.log(hist) 

2307 Util.log(self.reason) 

2308 Util.log("Starting size was {size}" 

2309 .format(size=self.startSize)) 

2310 Util.log("Final size was {size}" 

2311 .format(size=self.finishSize)) 

2312 Util.log("Minimum size acheived was {size}" 

2313 .format(size=self.minSize)) 

2314 

2315 def _coalesceLeaf(self, vdi): 

2316 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2317 complete due to external changes, namely vdi_delete and vdi_snapshot 

2318 that alter leaf-coalescibility of vdi""" 

2319 tracker = self.CoalesceTracker(self) 

2320 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2321 prevSizeVHD = vdi.getSizeVHD() 

2322 if not self._snapshotCoalesce(vdi): 2322 ↛ 2323line 2322 didn't jump to line 2323, because the condition on line 2322 was never true

2323 return False 

2324 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2325 tracker.printReasoning() 

2326 raise util.SMException("VDI {uuid} could not be coalesced" 

2327 .format(uuid=vdi.uuid)) 

2328 return self._liveLeafCoalesce(vdi) 

2329 

2330 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2331 speed = None 

2332 total_time = endTime - startTime 

2333 if total_time > 0: 

2334 speed = float(vhdSize) / float(total_time) 

2335 return speed 

2336 

2337 def writeSpeedToFile(self, speed): 

2338 content = [] 

2339 speedFile = None 

2340 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2341 self.lock() 

2342 try: 

2343 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2344 lines = "" 

2345 if not os.path.isfile(path): 

2346 lines = str(speed) + "\n" 

2347 else: 

2348 speedFile = open(path, "r+") 

2349 content = speedFile.readlines() 

2350 content.append(str(speed) + "\n") 

2351 if len(content) > N_RUNNING_AVERAGE: 

2352 del content[0] 

2353 lines = "".join(content) 

2354 

2355 util.atomicFileWrite(path, VAR_RUN, lines) 

2356 finally: 

2357 if speedFile is not None: 

2358 speedFile.close() 

2359 Util.log("Closing file: {myfile}".format(myfile=path)) 

2360 self.unlock() 

2361 

2362 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2363 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2364 if speed is None: 

2365 return 

2366 

2367 self.writeSpeedToFile(speed) 

2368 

2369 def getStorageSpeed(self): 

2370 speedFile = None 

2371 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2372 self.lock() 

2373 try: 

2374 speed = None 

2375 if os.path.isfile(path): 

2376 speedFile = open(path) 

2377 content = speedFile.readlines() 

2378 try: 

2379 content = [float(i) for i in content] 

2380 except ValueError: 

2381 Util.log("Something bad in the speed log:{log}". 

2382 format(log=speedFile.readlines())) 

2383 return speed 

2384 

2385 if len(content): 

2386 speed = sum(content) / float(len(content)) 

2387 if speed <= 0: 2387 ↛ 2389line 2387 didn't jump to line 2389, because the condition on line 2387 was never true

2388 # Defensive, should be impossible. 

2389 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2390 format(speed=speed, uuid=self.uuid)) 

2391 speed = None 

2392 else: 

2393 Util.log("Speed file empty for SR: {uuid}". 

2394 format(uuid=self.uuid)) 

2395 else: 

2396 Util.log("Speed log missing for SR: {uuid}". 

2397 format(uuid=self.uuid)) 

2398 return speed 

2399 finally: 

2400 if not (speedFile is None): 

2401 speedFile.close() 

2402 self.unlock() 

2403 

2404 def _snapshotCoalesce(self, vdi): 

2405 # Note that because we are not holding any locks here, concurrent SM 

2406 # operations may change this tree under our feet. In particular, vdi 

2407 # can be deleted, or it can be snapshotted. 

2408 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2409 Util.log("Single-snapshotting %s" % vdi) 

2410 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2411 try: 

2412 ret = self.xapi.singleSnapshotVDI(vdi) 

2413 Util.log("Single-snapshot returned: %s" % ret) 

2414 except XenAPI.Failure as e: 

2415 if util.isInvalidVDI(e): 

2416 Util.log("The VDI appears to have been concurrently deleted") 

2417 return False 

2418 raise 

2419 self.scanLocked() 

2420 tempSnap = vdi.parent 

2421 if not tempSnap.isCoalesceable(): 

2422 Util.log("The VDI appears to have been concurrently snapshotted") 

2423 return False 

2424 Util.log("Coalescing parent %s" % tempSnap) 

2425 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2426 vhdSize = vdi.getSizeVHD() 

2427 self._coalesce(tempSnap) 

2428 if not vdi.isLeafCoalesceable(): 

2429 Util.log("The VDI tree appears to have been altered since") 

2430 return False 

2431 return True 

2432 

2433 def _liveLeafCoalesce(self, vdi): 

2434 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2435 self.lock() 

2436 try: 

2437 self.scan() 

2438 if not self.getVDI(vdi.uuid): 

2439 Util.log("The VDI appears to have been deleted meanwhile") 

2440 return False 

2441 if not vdi.isLeafCoalesceable(): 

2442 Util.log("The VDI is no longer leaf-coalesceable") 

2443 return False 

2444 

2445 uuid = vdi.uuid 

2446 vdi.pause(failfast=True) 

2447 try: 

2448 try: 

2449 # "vdi" object will no longer be valid after this call 

2450 self._doCoalesceLeaf(vdi) 

2451 except: 

2452 Util.logException("_doCoalesceLeaf") 

2453 self._handleInterruptedCoalesceLeaf() 

2454 raise 

2455 finally: 

2456 vdi = self.getVDI(uuid) 

2457 if vdi: 

2458 vdi.ensureUnpaused() 

2459 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2460 if vdiOld: 

2461 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2462 self.deleteVDI(vdiOld) 

2463 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2464 finally: 

2465 self.cleanup() 

2466 self.unlock() 

2467 self.logFilter.logState() 

2468 return True 

2469 

2470 def _doCoalesceLeaf(self, vdi): 

2471 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2472 offline/atomic context""" 

2473 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2474 self._prepareCoalesceLeaf(vdi) 

2475 vdi.parent._setHidden(False) 

2476 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2477 vdi.validate(True) 

2478 vdi.parent.validate(True) 

2479 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2480 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2481 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2482 Util.log("Leaf-coalesce forced, will not use timeout") 

2483 timeout = 0 

2484 vdi._coalesceVHD(timeout) 

2485 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2486 vdi.parent.validate(True) 

2487 #vdi._verifyContents(timeout / 2) 

2488 

2489 # rename 

2490 vdiUuid = vdi.uuid 

2491 oldName = vdi.fileName 

2492 origParentUuid = vdi.parent.uuid 

2493 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2494 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2495 vdi.parent.rename(vdiUuid) 

2496 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2497 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2498 

2499 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2500 # garbage 

2501 

2502 # update the VDI record 

2503 vdi.parent.delConfig(VDI.DB_VHD_PARENT) 

2504 if vdi.parent.raw: 

2505 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2506 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2507 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2508 

2509 self._updateNode(vdi) 

2510 

2511 # delete the obsolete leaf & inflate the parent (in that order, to 

2512 # minimize free space requirements) 

2513 parent = vdi.parent 

2514 vdi._setHidden(True) 

2515 vdi.parent.children = [] 

2516 vdi.parent = None 

2517 

2518 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2519 freeSpace = self.getFreeSpace() 

2520 if freeSpace < extraSpace: 

2521 # don't delete unless we need the space: deletion is time-consuming 

2522 # because it requires contacting the slaves, and we're paused here 

2523 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2524 self.deleteVDI(vdi) 

2525 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2526 

2527 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2528 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2529 

2530 self.forgetVDI(origParentUuid) 

2531 self._finishCoalesceLeaf(parent) 

2532 self._updateSlavesOnResize(parent) 

2533 

2534 def _calcExtraSpaceNeeded(self, child, parent): 

2535 assert(not parent.raw) # raw parents not supported 

2536 extra = child.getSizeVHD() - parent.getSizeVHD() 

2537 if extra < 0: 

2538 extra = 0 

2539 return extra 

2540 

2541 def _prepareCoalesceLeaf(self, vdi): 

2542 pass 

2543 

2544 def _updateNode(self, vdi): 

2545 pass 

2546 

2547 def _finishCoalesceLeaf(self, parent): 

2548 pass 

2549 

2550 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

2551 pass 

2552 

2553 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid): 

2554 pass 

2555 

2556 def _updateSlavesOnResize(self, vdi): 

2557 pass 

2558 

2559 def _removeStaleVDIs(self, uuidsPresent): 

2560 for uuid in list(self.vdis.keys()): 

2561 if not uuid in uuidsPresent: 

2562 Util.log("VDI %s disappeared since last scan" % \ 

2563 self.vdis[uuid]) 

2564 del self.vdis[uuid] 

2565 

2566 def _handleInterruptedCoalesceLeaf(self): 

2567 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2568 inconsistent state. If the old-leaf VDI is still present, we revert the 

2569 operation (in case the original error is persistent); otherwise we must 

2570 finish the operation""" 

2571 # abstract 

2572 pass 

2573 

2574 def _buildTree(self, force): 

2575 self.vdiTrees = [] 

2576 for vdi in self.vdis.values(): 

2577 if vdi.parentUuid: 

2578 parent = self.getVDI(vdi.parentUuid) 

2579 if not parent: 

2580 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2581 self.vdiTrees.append(vdi) 

2582 continue 

2583 if force: 

2584 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2585 (vdi.parentUuid, vdi.uuid)) 

2586 self.vdiTrees.append(vdi) 

2587 continue 

2588 else: 

2589 raise util.SMException("Parent VDI %s of %s not " \ 

2590 "found" % (vdi.parentUuid, vdi.uuid)) 

2591 vdi.parent = parent 

2592 parent.children.append(vdi) 

2593 else: 

2594 self.vdiTrees.append(vdi) 

2595 

2596 

2597class FileSR(SR): 

2598 TYPE = SR.TYPE_FILE 

2599 CACHE_FILE_EXT = ".vhdcache" 

2600 # cache cleanup actions 

2601 CACHE_ACTION_KEEP = 0 

2602 CACHE_ACTION_REMOVE = 1 

2603 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2604 

2605 def __init__(self, uuid, xapi, createLock, force): 

2606 SR.__init__(self, uuid, xapi, createLock, force) 

2607 self.path = "/var/run/sr-mount/%s" % self.uuid 

2608 self.journaler = fjournaler.Journaler(self.path) 

2609 

2610 def scan(self, force=False): 

2611 if not util.pathexists(self.path): 

2612 raise util.SMException("directory %s not found!" % self.uuid) 

2613 vhds = self._scan(force) 

2614 for uuid, vhdInfo in vhds.items(): 

2615 vdi = self.getVDI(uuid) 

2616 if not vdi: 

2617 self.logFilter.logNewVDI(uuid) 

2618 vdi = FileVDI(self, uuid, False) 

2619 self.vdis[uuid] = vdi 

2620 vdi.load(vhdInfo) 

2621 uuidsPresent = list(vhds.keys()) 

2622 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2623 for rawName in rawList: 

2624 uuid = FileVDI.extractUuid(rawName) 

2625 uuidsPresent.append(uuid) 

2626 vdi = self.getVDI(uuid) 

2627 if not vdi: 

2628 self.logFilter.logNewVDI(uuid) 

2629 vdi = FileVDI(self, uuid, True) 

2630 self.vdis[uuid] = vdi 

2631 self._removeStaleVDIs(uuidsPresent) 

2632 self._buildTree(force) 

2633 self.logFilter.logState() 

2634 self._handleInterruptedCoalesceLeaf() 

2635 

2636 def getFreeSpace(self): 

2637 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2638 

2639 def deleteVDIs(self, vdiList): 

2640 rootDeleted = False 

2641 for vdi in vdiList: 

2642 if not vdi.parent: 

2643 rootDeleted = True 

2644 break 

2645 SR.deleteVDIs(self, vdiList) 

2646 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2647 self.xapi.markCacheSRsDirty() 

2648 

2649 def cleanupCache(self, maxAge=-1): 

2650 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2651 removed when the leaf node no longer exists or its allow-caching 

2652 attribute is not set. Caches for parent nodes are removed when the 

2653 parent node no longer exists or it hasn't been used in more than 

2654 <maxAge> hours. 

2655 Return number of caches removed. 

2656 """ 

2657 numRemoved = 0 

2658 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2659 Util.log("Found %d cache files" % len(cacheFiles)) 

2660 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2661 for cacheFile in cacheFiles: 

2662 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2663 action = self.CACHE_ACTION_KEEP 

2664 rec = self.xapi.getRecordVDI(uuid) 

2665 if not rec: 

2666 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2667 action = self.CACHE_ACTION_REMOVE 

2668 elif rec["managed"] and not rec["allow_caching"]: 

2669 Util.log("Cache %s: caching disabled" % uuid) 

2670 action = self.CACHE_ACTION_REMOVE 

2671 elif not rec["managed"] and maxAge >= 0: 

2672 lastAccess = datetime.datetime.fromtimestamp( \ 

2673 os.path.getatime(os.path.join(self.path, cacheFile))) 

2674 if lastAccess < cutoff: 

2675 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2676 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2677 

2678 if action == self.CACHE_ACTION_KEEP: 

2679 Util.log("Keeping cache %s" % uuid) 

2680 continue 

2681 

2682 lockId = uuid 

2683 parentUuid = None 

2684 if rec and rec["managed"]: 

2685 parentUuid = rec["sm_config"].get("vhd-parent") 

2686 if parentUuid: 

2687 lockId = parentUuid 

2688 

2689 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2690 cacheLock.acquire() 

2691 try: 

2692 if self._cleanupCache(uuid, action): 

2693 numRemoved += 1 

2694 finally: 

2695 cacheLock.release() 

2696 return numRemoved 

2697 

2698 def _cleanupCache(self, uuid, action): 

2699 assert(action != self.CACHE_ACTION_KEEP) 

2700 rec = self.xapi.getRecordVDI(uuid) 

2701 if rec and rec["allow_caching"]: 

2702 Util.log("Cache %s appears to have become valid" % uuid) 

2703 return False 

2704 

2705 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2706 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2707 if tapdisk: 

2708 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2709 Util.log("Cache %s still in use" % uuid) 

2710 return False 

2711 Util.log("Shutting down tapdisk for %s" % fullPath) 

2712 tapdisk.shutdown() 

2713 

2714 Util.log("Deleting file %s" % fullPath) 

2715 os.unlink(fullPath) 

2716 return True 

2717 

2718 def _isCacheFileName(self, name): 

2719 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2720 name.endswith(self.CACHE_FILE_EXT) 

2721 

2722 def _scan(self, force): 

2723 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2724 error = False 

2725 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2726 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2727 for uuid, vhdInfo in vhds.items(): 

2728 if vhdInfo.error: 

2729 error = True 

2730 break 

2731 if not error: 

2732 return vhds 

2733 Util.log("Scan error on attempt %d" % i) 

2734 if force: 

2735 return vhds 

2736 raise util.SMException("Scan error") 

2737 

2738 def deleteVDI(self, vdi): 

2739 self._checkSlaves(vdi) 

2740 SR.deleteVDI(self, vdi) 

2741 

2742 def _checkSlaves(self, vdi): 

2743 onlineHosts = self.xapi.getOnlineHosts() 

2744 abortFlag = IPCFlag(self.uuid) 

2745 for pbdRecord in self.xapi.getAttachedPBDs(): 

2746 hostRef = pbdRecord["host"] 

2747 if hostRef == self.xapi._hostRef: 

2748 continue 

2749 if abortFlag.test(FLAG_TYPE_ABORT): 

2750 raise AbortException("Aborting due to signal") 

2751 try: 

2752 self._checkSlave(hostRef, vdi) 

2753 except util.CommandException: 

2754 if hostRef in onlineHosts: 

2755 raise 

2756 

2757 def _checkSlave(self, hostRef, vdi): 

2758 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

2759 Util.log("Checking with slave: %s" % repr(call)) 

2760 _host = self.xapi.session.xenapi.host 

2761 text = _host.call_plugin( * call) 

2762 

2763 def _handleInterruptedCoalesceLeaf(self): 

2764 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2765 for uuid, parentUuid in entries.items(): 

2766 fileList = os.listdir(self.path) 

2767 childName = uuid + vhdutil.FILE_EXTN_VHD 

2768 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

2769 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

2770 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

2771 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

2772 if parentPresent or tmpChildName in fileList: 

2773 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2774 else: 

2775 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2776 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2777 vdi = self.getVDI(uuid) 

2778 if vdi: 

2779 vdi.ensureUnpaused() 

2780 

2781 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2782 Util.log("*** UNDO LEAF-COALESCE") 

2783 parent = self.getVDI(parentUuid) 

2784 if not parent: 

2785 parent = self.getVDI(childUuid) 

2786 if not parent: 

2787 raise util.SMException("Neither %s nor %s found" % \ 

2788 (parentUuid, childUuid)) 

2789 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2790 parent.rename(parentUuid) 

2791 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2792 

2793 child = self.getVDI(childUuid) 

2794 if not child: 

2795 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

2796 if not child: 

2797 raise util.SMException("Neither %s nor %s found" % \ 

2798 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

2799 Util.log("Renaming child back to %s" % childUuid) 

2800 child.rename(childUuid) 

2801 Util.log("Updating the VDI record") 

2802 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

2803 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

2804 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

2805 

2806 if child.hidden: 

2807 child._setHidden(False) 

2808 if not parent.hidden: 

2809 parent._setHidden(True) 

2810 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

2811 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

2812 Util.log("*** leaf-coalesce undo successful") 

2813 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

2814 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

2815 

2816 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2817 Util.log("*** FINISH LEAF-COALESCE") 

2818 vdi = self.getVDI(childUuid) 

2819 if not vdi: 

2820 raise util.SMException("VDI %s not found" % childUuid) 

2821 try: 

2822 self.forgetVDI(parentUuid) 

2823 except XenAPI.Failure: 

2824 pass 

2825 self._updateSlavesOnResize(vdi) 

2826 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

2827 Util.log("*** finished leaf-coalesce successfully") 

2828 

2829 

2830class LVHDSR(SR): 

2831 TYPE = SR.TYPE_LVHD 

2832 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

2833 

2834 def __init__(self, uuid, xapi, createLock, force): 

2835 SR.__init__(self, uuid, xapi, createLock, force) 

2836 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

2837 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

2838 

2839 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

2840 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

2841 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

2842 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

2843 

2844 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

2845 self.journaler = journaler.Journaler(self.lvmCache) 

2846 

2847 def deleteVDI(self, vdi): 

2848 if self.lvActivator.get(vdi.uuid, False): 

2849 self.lvActivator.deactivate(vdi.uuid, False) 

2850 self._checkSlaves(vdi) 

2851 SR.deleteVDI(self, vdi) 

2852 

2853 def forgetVDI(self, vdiUuid): 

2854 SR.forgetVDI(self, vdiUuid) 

2855 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

2856 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

2857 

2858 def getFreeSpace(self): 

2859 stats = lvutil._getVGstats(self.vgName) 

2860 return stats['physical_size'] - stats['physical_utilisation'] 

2861 

2862 def cleanup(self): 

2863 if not self.lvActivator.deactivateAll(): 

2864 Util.log("ERROR deactivating LVs while cleaning up") 

2865 

2866 def needUpdateBlockInfo(self): 

2867 for vdi in self.vdis.values(): 

2868 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2869 continue 

2870 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2871 return True 

2872 return False 

2873 

2874 def updateBlockInfo(self): 

2875 numUpdated = 0 

2876 for vdi in self.vdis.values(): 

2877 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

2878 continue 

2879 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2880 vdi.updateBlockInfo() 

2881 numUpdated += 1 

2882 if numUpdated: 

2883 # deactivate the LVs back sooner rather than later. If we don't 

2884 # now, by the time this thread gets to deactivations, another one 

2885 # might have leaf-coalesced a node and deleted it, making the child 

2886 # inherit the refcount value and preventing the correct decrement 

2887 self.cleanup() 

2888 

2889 def scan(self, force=False): 

2890 vdis = self._scan(force) 

2891 for uuid, vdiInfo in vdis.items(): 

2892 vdi = self.getVDI(uuid) 

2893 if not vdi: 

2894 self.logFilter.logNewVDI(uuid) 

2895 vdi = LVHDVDI(self, uuid, 

2896 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

2897 self.vdis[uuid] = vdi 

2898 vdi.load(vdiInfo) 

2899 self._removeStaleVDIs(vdis.keys()) 

2900 self._buildTree(force) 

2901 self.logFilter.logState() 

2902 self._handleInterruptedCoalesceLeaf() 

2903 

2904 def _scan(self, force): 

2905 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2906 error = False 

2907 self.lvmCache.refresh() 

2908 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

2909 for uuid, vdiInfo in vdis.items(): 

2910 if vdiInfo.scanError: 

2911 error = True 

2912 break 

2913 if not error: 

2914 return vdis 

2915 Util.log("Scan error, retrying (%d)" % i) 

2916 if force: 

2917 return vdis 

2918 raise util.SMException("Scan error") 

2919 

2920 def _removeStaleVDIs(self, uuidsPresent): 

2921 for uuid in list(self.vdis.keys()): 

2922 if not uuid in uuidsPresent: 

2923 Util.log("VDI %s disappeared since last scan" % \ 

2924 self.vdis[uuid]) 

2925 del self.vdis[uuid] 

2926 if self.lvActivator.get(uuid, False): 

2927 self.lvActivator.remove(uuid, False) 

2928 

2929 def _liveLeafCoalesce(self, vdi): 

2930 """If the parent is raw and the child was resized (virt. size), then 

2931 we'll need to resize the parent, which can take a while due to zeroing 

2932 out of the extended portion of the LV. Do it before pausing the child 

2933 to avoid a protracted downtime""" 

2934 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

2935 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2936 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

2937 

2938 return SR._liveLeafCoalesce(self, vdi) 

2939 

2940 def _prepareCoalesceLeaf(self, vdi): 

2941 vdi._activateChain() 

2942 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

2943 vdi.deflate() 

2944 vdi.inflateParentForCoalesce() 

2945 

2946 def _updateNode(self, vdi): 

2947 # fix the refcounts: the remaining node should inherit the binary 

2948 # refcount from the leaf (because if it was online, it should remain 

2949 # refcounted as such), but the normal refcount from the parent (because 

2950 # this node is really the parent node) - minus 1 if it is online (since 

2951 # non-leaf nodes increment their normal counts when they are online and 

2952 # we are now a leaf, storing that 1 in the binary refcount). 

2953 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

2954 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

2955 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

2956 pCnt = pCnt - cBcnt 

2957 assert(pCnt >= 0) 

2958 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

2959 

2960 def _finishCoalesceLeaf(self, parent): 

2961 if not parent.isSnapshot() or parent.isAttachedRW(): 

2962 parent.inflateFully() 

2963 else: 

2964 parent.deflate() 

2965 

2966 def _calcExtraSpaceNeeded(self, child, parent): 

2967 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

2968 

2969 def _handleInterruptedCoalesceLeaf(self): 

2970 entries = self.journaler.getAll(VDI.JRN_LEAF) 

2971 for uuid, parentUuid in entries.items(): 

2972 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

2973 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

2974 self.TMP_RENAME_PREFIX + uuid 

2975 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

2976 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

2977 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

2978 self.lvmCache.checkLV(parentLV2)) 

2979 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

2980 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

2981 else: 

2982 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

2983 self.journaler.remove(VDI.JRN_LEAF, uuid) 

2984 vdi = self.getVDI(uuid) 

2985 if vdi: 

2986 vdi.ensureUnpaused() 

2987 

2988 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

2989 Util.log("*** UNDO LEAF-COALESCE") 

2990 parent = self.getVDI(parentUuid) 

2991 if not parent: 

2992 parent = self.getVDI(childUuid) 

2993 if not parent: 

2994 raise util.SMException("Neither %s nor %s found" % \ 

2995 (parentUuid, childUuid)) 

2996 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

2997 parent.rename(parentUuid) 

2998 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

2999 

3000 child = self.getVDI(childUuid) 

3001 if not child: 

3002 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3003 if not child: 

3004 raise util.SMException("Neither %s nor %s found" % \ 

3005 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3006 Util.log("Renaming child back to %s" % childUuid) 

3007 child.rename(childUuid) 

3008 Util.log("Updating the VDI record") 

3009 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3010 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3011 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3012 

3013 # refcount (best effort - assume that it had succeeded if the 

3014 # second rename succeeded; if not, this adjustment will be wrong, 

3015 # leading to a non-deactivation of the LV) 

3016 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3017 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3018 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3019 pCnt = pCnt + cBcnt 

3020 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3021 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3022 

3023 parent.deflate() 

3024 child.inflateFully() 

3025 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3026 if child.hidden: 

3027 child._setHidden(False) 

3028 if not parent.hidden: 

3029 parent._setHidden(True) 

3030 if not parent.lvReadonly: 

3031 self.lvmCache.setReadonly(parent.fileName, True) 

3032 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3033 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3034 Util.log("*** leaf-coalesce undo successful") 

3035 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3036 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3037 

3038 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3039 Util.log("*** FINISH LEAF-COALESCE") 

3040 vdi = self.getVDI(childUuid) 

3041 if not vdi: 

3042 raise util.SMException("VDI %s not found" % childUuid) 

3043 vdi.inflateFully() 

3044 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3045 try: 

3046 self.forgetVDI(parentUuid) 

3047 except XenAPI.Failure: 

3048 pass 

3049 self._updateSlavesOnResize(vdi) 

3050 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3051 Util.log("*** finished leaf-coalesce successfully") 

3052 

3053 def _checkSlaves(self, vdi): 

3054 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3055 try to check all slaves, including those that the Agent believes are 

3056 offline, but ignore failures for offline hosts. This is to avoid cases 

3057 where the Agent thinks a host is offline but the host is up.""" 

3058 args = {"vgName": self.vgName, 

3059 "action1": "deactivateNoRefcount", 

3060 "lvName1": vdi.fileName, 

3061 "action2": "cleanupLockAndRefcount", 

3062 "uuid2": vdi.uuid, 

3063 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3064 onlineHosts = self.xapi.getOnlineHosts() 

3065 abortFlag = IPCFlag(self.uuid) 

3066 for pbdRecord in self.xapi.getAttachedPBDs(): 

3067 hostRef = pbdRecord["host"] 

3068 if hostRef == self.xapi._hostRef: 

3069 continue 

3070 if abortFlag.test(FLAG_TYPE_ABORT): 

3071 raise AbortException("Aborting due to signal") 

3072 Util.log("Checking with slave %s (path %s)" % ( 

3073 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3074 try: 

3075 self.xapi.ensureInactive(hostRef, args) 

3076 except XenAPI.Failure: 

3077 if hostRef in onlineHosts: 

3078 raise 

3079 

3080 def _updateSlavesOnUndoLeafCoalesce(self, parent, child): 

3081 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3082 if not slaves: 

3083 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3084 child) 

3085 return 

3086 

3087 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3088 self.TMP_RENAME_PREFIX + child.uuid 

3089 args = {"vgName": self.vgName, 

3090 "action1": "deactivateNoRefcount", 

3091 "lvName1": tmpName, 

3092 "action2": "deactivateNoRefcount", 

3093 "lvName2": child.fileName, 

3094 "action3": "refresh", 

3095 "lvName3": child.fileName, 

3096 "action4": "refresh", 

3097 "lvName4": parent.fileName} 

3098 for slave in slaves: 

3099 Util.log("Updating %s, %s, %s on slave %s" % \ 

3100 (tmpName, child.fileName, parent.fileName, 

3101 self.xapi.getRecordHost(slave)['hostname'])) 

3102 text = self.xapi.session.xenapi.host.call_plugin( \ 

3103 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3104 Util.log("call-plugin returned: '%s'" % text) 

3105 

3106 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid): 

3107 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3108 if not slaves: 

3109 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3110 return 

3111 

3112 args = {"vgName": self.vgName, 

3113 "action1": "deactivateNoRefcount", 

3114 "lvName1": oldNameLV, 

3115 "action2": "refresh", 

3116 "lvName2": vdi.fileName, 

3117 "action3": "cleanupLockAndRefcount", 

3118 "uuid3": origParentUuid, 

3119 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3120 for slave in slaves: 

3121 Util.log("Updating %s to %s on slave %s" % \ 

3122 (oldNameLV, vdi.fileName, 

3123 self.xapi.getRecordHost(slave)['hostname'])) 

3124 text = self.xapi.session.xenapi.host.call_plugin( \ 

3125 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3126 Util.log("call-plugin returned: '%s'" % text) 

3127 

3128 def _updateSlavesOnResize(self, vdi): 

3129 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3130 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3131 if not slaves: 

3132 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3133 return 

3134 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3135 vdi.fileName, vdi.uuid, slaves) 

3136 

3137 

3138class LinstorSR(SR): 

3139 TYPE = SR.TYPE_LINSTOR 

3140 

3141 def __init__(self, uuid, xapi, createLock, force): 

3142 if not LINSTOR_AVAILABLE: 

3143 raise util.SMException( 

3144 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3145 ) 

3146 

3147 SR.__init__(self, uuid, xapi, createLock, force) 

3148 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3149 self._reloadLinstor() 

3150 

3151 def deleteVDI(self, vdi): 

3152 self._checkSlaves(vdi) 

3153 SR.deleteVDI(self, vdi) 

3154 

3155 def getFreeSpace(self): 

3156 return self._linstor.max_volume_size_allowed 

3157 

3158 def scan(self, force=False): 

3159 all_vdi_info = self._scan(force) 

3160 for uuid, vdiInfo in all_vdi_info.items(): 

3161 # When vdiInfo is None, the VDI is RAW. 

3162 vdi = self.getVDI(uuid) 

3163 if not vdi: 

3164 self.logFilter.logNewVDI(uuid) 

3165 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3166 self.vdis[uuid] = vdi 

3167 if vdiInfo: 

3168 vdi.load(vdiInfo) 

3169 self._removeStaleVDIs(all_vdi_info.keys()) 

3170 self._buildTree(force) 

3171 self.logFilter.logState() 

3172 self._handleInterruptedCoalesceLeaf() 

3173 

3174 def pauseVDIs(self, vdiList): 

3175 self._linstor.ensure_volume_list_is_not_locked( 

3176 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3177 ) 

3178 return super(LinstorSR, self).pauseVDIs(vdiList) 

3179 

3180 def _reloadLinstor(self): 

3181 session = self.xapi.session 

3182 host_ref = util.get_this_host_ref(session) 

3183 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3184 

3185 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3186 if pbd is None: 

3187 raise util.SMException('Failed to find PBD') 

3188 

3189 dconf = session.xenapi.PBD.get_device_config(pbd) 

3190 group_name = dconf['group-name'] 

3191 

3192 controller_uri = get_controller_uri() 

3193 self.journaler = LinstorJournaler( 

3194 controller_uri, group_name, logger=util.SMlog 

3195 ) 

3196 

3197 self._linstor = LinstorVolumeManager( 

3198 controller_uri, 

3199 group_name, 

3200 repair=True, 

3201 logger=util.SMlog 

3202 ) 

3203 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3204 

3205 def _scan(self, force): 

3206 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3207 self._reloadLinstor() 

3208 error = False 

3209 try: 

3210 all_vdi_info = self._load_vdi_info() 

3211 for uuid, vdiInfo in all_vdi_info.items(): 

3212 if vdiInfo and vdiInfo.error: 

3213 error = True 

3214 break 

3215 if not error: 

3216 return all_vdi_info 

3217 Util.log('Scan error, retrying ({})'.format(i)) 

3218 except Exception as e: 

3219 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3220 Util.log(traceback.format_exc()) 

3221 

3222 if force: 

3223 return all_vdi_info 

3224 raise util.SMException('Scan error') 

3225 

3226 def _load_vdi_info(self): 

3227 all_vdi_info = {} 

3228 

3229 # TODO: Ensure metadata contains the right info. 

3230 

3231 all_volume_info = self._linstor.get_volumes_with_info() 

3232 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3233 for vdi_uuid, volume_info in all_volume_info.items(): 

3234 try: 

3235 volume_metadata = volumes_metadata[vdi_uuid] 

3236 if not volume_info.name and not list(volume_metadata.items()): 

3237 continue # Ignore it, probably deleted. 

3238 

3239 if vdi_uuid.startswith('DELETED_'): 

3240 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3241 # We must remove this VDI now without adding it in the VDI list. 

3242 # Otherwise `Relinking` calls and other actions can be launched on it. 

3243 # We don't want that... 

3244 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3245 

3246 self.lock() 

3247 try: 

3248 self._linstor.destroy_volume(vdi_uuid) 

3249 try: 

3250 self.forgetVDI(vdi_uuid) 

3251 except: 

3252 pass 

3253 except Exception as e: 

3254 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3255 finally: 

3256 self.unlock() 

3257 continue 

3258 

3259 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3260 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3261 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3262 # Always RAW! 

3263 info = None 

3264 elif vdi_type == vhdutil.VDI_TYPE_VHD: 

3265 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3266 else: 

3267 # Ensure it's not a VHD... 

3268 try: 

3269 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3270 except: 

3271 try: 

3272 self._vhdutil.force_repair( 

3273 self._linstor.get_device_path(vdi_uuid) 

3274 ) 

3275 info = self._vhdutil.get_vhd_info(vdi_uuid) 

3276 except: 

3277 info = None 

3278 

3279 except Exception as e: 

3280 Util.log( 

3281 ' [VDI {}: failed to load VDI info]: {}' 

3282 .format(vdi_uuid, e) 

3283 ) 

3284 info = vhdutil.VHDInfo(vdi_uuid) 

3285 info.error = 1 

3286 

3287 all_vdi_info[vdi_uuid] = info 

3288 

3289 return all_vdi_info 

3290 

3291 def _prepareCoalesceLeaf(self, vdi): 

3292 vdi._activateChain() 

3293 vdi.deflate() 

3294 vdi._inflateParentForCoalesce() 

3295 

3296 def _finishCoalesceLeaf(self, parent): 

3297 if not parent.isSnapshot() or parent.isAttachedRW(): 

3298 parent.inflateFully() 

3299 else: 

3300 parent.deflate() 

3301 

3302 def _calcExtraSpaceNeeded(self, child, parent): 

3303 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3304 

3305 def _hasValidDevicePath(self, uuid): 

3306 try: 

3307 self._linstor.get_device_path(uuid) 

3308 except Exception: 

3309 # TODO: Maybe log exception. 

3310 return False 

3311 return True 

3312 

3313 def _liveLeafCoalesce(self, vdi): 

3314 self.lock() 

3315 try: 

3316 self._linstor.ensure_volume_is_not_locked( 

3317 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3318 ) 

3319 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3320 finally: 

3321 self.unlock() 

3322 

3323 def _handleInterruptedCoalesceLeaf(self): 

3324 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3325 for uuid, parentUuid in entries.items(): 

3326 if self._hasValidDevicePath(parentUuid) or \ 

3327 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3328 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3329 else: 

3330 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3331 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3332 vdi = self.getVDI(uuid) 

3333 if vdi: 

3334 vdi.ensureUnpaused() 

3335 

3336 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3337 Util.log('*** UNDO LEAF-COALESCE') 

3338 parent = self.getVDI(parentUuid) 

3339 if not parent: 

3340 parent = self.getVDI(childUuid) 

3341 if not parent: 

3342 raise util.SMException( 

3343 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3344 ) 

3345 Util.log( 

3346 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3347 ) 

3348 parent.rename(parentUuid) 

3349 

3350 child = self.getVDI(childUuid) 

3351 if not child: 

3352 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3353 if not child: 

3354 raise util.SMException( 

3355 'Neither {} nor {} found'.format( 

3356 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3357 ) 

3358 ) 

3359 Util.log('Renaming child back to {}'.format(childUuid)) 

3360 child.rename(childUuid) 

3361 Util.log('Updating the VDI record') 

3362 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3363 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3364 

3365 # TODO: Maybe deflate here. 

3366 

3367 if child.hidden: 

3368 child._setHidden(False) 

3369 if not parent.hidden: 

3370 parent._setHidden(True) 

3371 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3372 Util.log('*** leaf-coalesce undo successful') 

3373 

3374 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3375 Util.log('*** FINISH LEAF-COALESCE') 

3376 vdi = self.getVDI(childUuid) 

3377 if not vdi: 

3378 raise util.SMException('VDI {} not found'.format(childUuid)) 

3379 # TODO: Maybe inflate. 

3380 try: 

3381 self.forgetVDI(parentUuid) 

3382 except XenAPI.Failure: 

3383 pass 

3384 self._updateSlavesOnResize(vdi) 

3385 Util.log('*** finished leaf-coalesce successfully') 

3386 

3387 def _checkSlaves(self, vdi): 

3388 try: 

3389 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3390 for openers in all_openers.values(): 

3391 for opener in openers.values(): 

3392 if opener['process-name'] != 'tapdisk': 

3393 raise util.SMException( 

3394 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3395 ) 

3396 except LinstorVolumeManagerError as e: 

3397 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3398 raise 

3399 

3400 

3401################################################################################ 

3402# 

3403# Helpers 

3404# 

3405def daemonize(): 

3406 pid = os.fork() 

3407 if pid: 3407 ↛ 3411line 3407 didn't jump to line 3411, because the condition on line 3407 was never false

3408 os.waitpid(pid, 0) 

3409 Util.log("New PID [%d]" % pid) 

3410 return False 

3411 os.chdir("/") 

3412 os.setsid() 

3413 pid = os.fork() 

3414 if pid: 

3415 Util.log("Will finish as PID [%d]" % pid) 

3416 os._exit(0) 

3417 for fd in [0, 1, 2]: 

3418 try: 

3419 os.close(fd) 

3420 except OSError: 

3421 pass 

3422 # we need to fill those special fd numbers or pread won't work 

3423 sys.stdin = open("/dev/null", 'r') 

3424 sys.stderr = open("/dev/null", 'w') 

3425 sys.stdout = open("/dev/null", 'w') 

3426 # As we're a new process we need to clear the lock objects 

3427 lock.Lock.clearAll() 

3428 return True 

3429 

3430 

3431def normalizeType(type): 

3432 if type in LVHDSR.SUBTYPES: 

3433 type = SR.TYPE_LVHD 

3434 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3435 # temporary while LVHD is symlinked as LVM 

3436 type = SR.TYPE_LVHD 

3437 if type in [ 

3438 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3439 "moosefs", "xfs", "zfs", "largeblock" 

3440 ]: 

3441 type = SR.TYPE_FILE 

3442 if type in ["linstor"]: 

3443 type = SR.TYPE_LINSTOR 

3444 if type not in SR.TYPES: 

3445 raise util.SMException("Unsupported SR type: %s" % type) 

3446 return type 

3447 

3448GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3449 

3450 

3451def _gc_init_file(sr_uuid): 

3452 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3453 

3454 

3455def _create_init_file(sr_uuid): 

3456 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3457 with open(os.path.join( 

3458 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3459 f.write('1') 

3460 

3461 

3462def _gcLoopPause(sr, dryRun=False, immediate=False): 

3463 if immediate: 

3464 return 

3465 

3466 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3467 # point will just return. Otherwise, fall back on an abortable sleep. 

3468 

3469 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3470 

3471 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3471 ↛ exitline 3471 didn't jump to the function exit

3472 lambda *args: None) 

3473 elif os.path.exists(_gc_init_file(sr.uuid)): 

3474 def abortTest(): 

3475 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3476 

3477 # If time.sleep hangs we are in deep trouble, however for 

3478 # completeness we set the timeout of the abort thread to 

3479 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3480 Util.log("GC active, about to go quiet") 

3481 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3481 ↛ exitline 3481 didn't run the lambda on line 3481

3482 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3483 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3484 Util.log("GC active, quiet period ended") 

3485 

3486 

3487def _gcLoop(sr, dryRun=False, immediate=False): 

3488 if not lockActive.acquireNoblock(): 3488 ↛ 3489line 3488 didn't jump to line 3489, because the condition on line 3488 was never true

3489 Util.log("Another GC instance already active, exiting") 

3490 return 

3491 # Track how many we do 

3492 coalesced = 0 

3493 task_status = "success" 

3494 try: 

3495 # Check if any work needs to be done 

3496 if not sr.xapi.isPluggedHere(): 

3497 Util.log("SR no longer attached, exiting") 

3498 return 

3499 sr.scanLocked() 

3500 if not sr.hasWork(): 

3501 Util.log("No work, exiting") 

3502 return 

3503 sr.xapi.create_task( 

3504 "Garbage Collection", 

3505 "Garbage collection for SR %s" % sr.uuid) 

3506 _gcLoopPause(sr, dryRun, immediate=immediate) 

3507 while True: 

3508 if not sr.xapi.isPluggedHere(): 3508 ↛ 3509line 3508 didn't jump to line 3509, because the condition on line 3508 was never true

3509 Util.log("SR no longer attached, exiting") 

3510 break 

3511 sr.scanLocked() 

3512 if not sr.hasWork(): 

3513 Util.log("No work, exiting") 

3514 break 

3515 

3516 if not lockRunning.acquireNoblock(): 3516 ↛ 3517line 3516 didn't jump to line 3517, because the condition on line 3516 was never true

3517 Util.log("Unable to acquire GC running lock.") 

3518 return 

3519 try: 

3520 if not sr.gcEnabled(): 3520 ↛ 3521line 3520 didn't jump to line 3521, because the condition on line 3520 was never true

3521 break 

3522 

3523 sr.xapi.update_task_progress("done", coalesced) 

3524 

3525 sr.cleanupCoalesceJournals() 

3526 # Create the init file here in case startup is waiting on it 

3527 _create_init_file(sr.uuid) 

3528 sr.scanLocked() 

3529 sr.updateBlockInfo() 

3530 

3531 howmany = len(sr.findGarbage()) 

3532 if howmany > 0: 

3533 Util.log("Found %d orphaned vdis" % howmany) 

3534 sr.lock() 

3535 try: 

3536 sr.garbageCollect(dryRun) 

3537 finally: 

3538 sr.unlock() 

3539 sr.xapi.srUpdate() 

3540 

3541 candidate = sr.findCoalesceable() 

3542 if candidate: 

3543 util.fistpoint.activate( 

3544 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3545 sr.coalesce(candidate, dryRun) 

3546 sr.xapi.srUpdate() 

3547 coalesced += 1 

3548 continue 

3549 

3550 candidate = sr.findLeafCoalesceable() 

3551 if candidate: 3551 ↛ 3558line 3551 didn't jump to line 3558, because the condition on line 3551 was never false

3552 sr.coalesceLeaf(candidate, dryRun) 

3553 sr.xapi.srUpdate() 

3554 coalesced += 1 

3555 continue 

3556 

3557 finally: 

3558 lockRunning.release() 3558 ↛ 3563line 3558 didn't jump to line 3563, because the break on line 3521 wasn't executed

3559 except: 

3560 task_status = "failure" 

3561 raise 

3562 finally: 

3563 sr.xapi.set_task_status(task_status) 

3564 Util.log("GC process exiting, no work left") 

3565 _create_init_file(sr.uuid) 

3566 lockActive.release() 

3567 

3568 

3569def _xapi_enabled(session, hostref): 

3570 host = session.xenapi.host.get_record(hostref) 

3571 return host['enabled'] 

3572 

3573 

3574def _ensure_xapi_initialised(session): 

3575 """ 

3576 Don't want to start GC until Xapi is fully initialised 

3577 """ 

3578 local_session = None 

3579 if session is None: 

3580 local_session = util.get_localAPI_session() 

3581 session = local_session 

3582 

3583 try: 

3584 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3585 while not _xapi_enabled(session, hostref): 

3586 util.SMlog("Xapi not ready, GC waiting") 

3587 time.sleep(15) 

3588 finally: 

3589 if local_session is not None: 

3590 local_session.xenapi.session.logout() 

3591 

3592def _gc(session, srUuid, dryRun=False, immediate=False): 

3593 init(srUuid) 

3594 _ensure_xapi_initialised(session) 

3595 sr = SR.getInstance(srUuid, session) 

3596 if not sr.gcEnabled(False): 3596 ↛ 3597line 3596 didn't jump to line 3597, because the condition on line 3596 was never true

3597 return 

3598 

3599 sr.cleanupCache() 

3600 try: 

3601 _gcLoop(sr, dryRun, immediate=immediate) 

3602 finally: 

3603 sr.cleanup() 

3604 sr.logFilter.logState() 

3605 del sr.xapi 

3606 

3607 

3608def _abort(srUuid, soft=False): 

3609 """Aborts an GC/coalesce. 

3610 

3611 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3612 soft: If set to True and there is a pending abort signal, the function 

3613 doesn't do anything. If set to False, a new abort signal is issued. 

3614 

3615 returns: If soft is set to False, we return True holding lockActive. If 

3616 soft is set to False and an abort signal is pending, we return False 

3617 without holding lockActive. An exception is raised in case of error.""" 

3618 Util.log("=== SR %s: abort ===" % (srUuid)) 

3619 init(srUuid) 

3620 if not lockActive.acquireNoblock(): 

3621 gotLock = False 

3622 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3623 abortFlag = IPCFlag(srUuid) 

3624 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3625 return False 

3626 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3627 gotLock = lockActive.acquireNoblock() 

3628 if gotLock: 

3629 break 

3630 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3631 abortFlag.clear(FLAG_TYPE_ABORT) 

3632 if not gotLock: 

3633 raise util.CommandException(code=errno.ETIMEDOUT, 

3634 reason="SR %s: error aborting existing process" % srUuid) 

3635 return True 

3636 

3637 

3638def init(srUuid): 

3639 global lockRunning 

3640 if not lockRunning: 3640 ↛ 3641line 3640 didn't jump to line 3641, because the condition on line 3640 was never true

3641 lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) 

3642 global lockActive 

3643 if not lockActive: 3643 ↛ 3644line 3643 didn't jump to line 3644, because the condition on line 3643 was never true

3644 lockActive = LockActive(srUuid) 

3645 

3646 

3647class LockActive: 

3648 """ 

3649 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3650 if another process holds the SR lock. 

3651 """ 

3652 def __init__(self, srUuid): 

3653 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3654 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3655 

3656 def acquireNoblock(self): 

3657 self._srLock.acquire() 

3658 

3659 try: 

3660 return self._lock.acquireNoblock() 

3661 finally: 

3662 self._srLock.release() 

3663 

3664 def release(self): 

3665 self._lock.release() 

3666 

3667 

3668def usage(): 

3669 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3670 

3671Parameters: 

3672 -u --uuid UUID SR UUID 

3673 and one of: 

3674 -g --gc garbage collect, coalesce, and repeat while there is work 

3675 -G --gc_force garbage collect once, aborting any current operations 

3676 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3677 max_age hours 

3678 -a --abort abort any currently running operation (GC or coalesce) 

3679 -q --query query the current state (GC'ing, coalescing or not running) 

3680 -x --disable disable GC/coalesce (will be in effect until you exit) 

3681 -t --debug see Debug below 

3682 

3683Options: 

3684 -b --background run in background (return immediately) (valid for -g only) 

3685 -f --force continue in the presence of VHDs with errors (when doing 

3686 GC, this might cause removal of any such VHDs) (only valid 

3687 for -G) (DANGEROUS) 

3688 

3689Debug: 

3690 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3691 purposes. ** NEVER USE IT ON A LIVE VM ** 

3692 The following parameters are required: 

3693 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3694 "deflate". 

3695 -v --vdi_uuid VDI UUID 

3696 """ 

3697 #-d --dry-run don't actually perform any SR-modifying operations 

3698 print(output) 

3699 Util.log("(Invalid usage)") 

3700 sys.exit(1) 

3701 

3702 

3703############################################################################## 

3704# 

3705# API 

3706# 

3707def abort(srUuid, soft=False): 

3708 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3709 """ 

3710 if _abort(srUuid, soft): 

3711 Util.log("abort: releasing the process lock") 

3712 lockActive.release() 

3713 return True 

3714 else: 

3715 return False 

3716 

3717 

3718def gc(session, srUuid, inBackground, dryRun=False): 

3719 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

3720 immediately if inBackground=True. 

3721 

3722 The following algorithm is used: 

3723 1. If we are already GC'ing in this SR, return 

3724 2. If we are already coalescing a VDI pair: 

3725 a. Scan the SR and determine if the VDI pair is GC'able 

3726 b. If the pair is not GC'able, return 

3727 c. If the pair is GC'able, abort coalesce 

3728 3. Scan the SR 

3729 4. If there is nothing to collect, nor to coalesce, return 

3730 5. If there is something to collect, GC all, then goto 3 

3731 6. If there is something to coalesce, coalesce one pair, then goto 3 

3732 """ 

3733 Util.log("=== SR %s: gc ===" % srUuid) 

3734 if inBackground: 

3735 if daemonize(): 

3736 # we are now running in the background. Catch & log any errors 

3737 # because there is no other way to propagate them back at this 

3738 # point 

3739 

3740 try: 

3741 _gc(None, srUuid, dryRun) 

3742 except AbortException: 

3743 Util.log("Aborted") 

3744 except Exception: 

3745 Util.logException("gc") 

3746 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3747 os._exit(0) 

3748 else: 

3749 _gc(session, srUuid, dryRun, immediate=True) 

3750 

3751 

3752def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

3753 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

3754 the SR lock is held. 

3755 The following algorithm is used: 

3756 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

3757 2. Scan the SR 

3758 3. GC 

3759 4. return 

3760 """ 

3761 Util.log("=== SR %s: gc_force ===" % srUuid) 

3762 init(srUuid) 

3763 sr = SR.getInstance(srUuid, session, lockSR, True) 

3764 if not lockActive.acquireNoblock(): 

3765 abort(srUuid) 

3766 else: 

3767 Util.log("Nothing was running, clear to proceed") 

3768 

3769 if force: 

3770 Util.log("FORCED: will continue even if there are VHD errors") 

3771 sr.scanLocked(force) 

3772 sr.cleanupCoalesceJournals() 

3773 

3774 try: 

3775 sr.cleanupCache() 

3776 sr.garbageCollect(dryRun) 

3777 finally: 

3778 sr.cleanup() 

3779 sr.logFilter.logState() 

3780 lockActive.release() 

3781 

3782 

3783def get_state(srUuid): 

3784 """Return whether GC/coalesce is currently running or not. The information 

3785 is not guaranteed for any length of time if the call is not protected by 

3786 locking. 

3787 """ 

3788 init(srUuid) 

3789 if lockActive.acquireNoblock(): 

3790 lockActive.release() 

3791 return False 

3792 return True 

3793 

3794 

3795def should_preempt(session, srUuid): 

3796 sr = SR.getInstance(srUuid, session) 

3797 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

3798 if len(entries) == 0: 

3799 return False 

3800 elif len(entries) > 1: 

3801 raise util.SMException("More than one coalesce entry: " + str(entries)) 

3802 sr.scanLocked() 

3803 coalescedUuid = entries.popitem()[0] 

3804 garbage = sr.findGarbage() 

3805 for vdi in garbage: 

3806 if vdi.uuid == coalescedUuid: 

3807 return True 

3808 return False 

3809 

3810 

3811def get_coalesceable_leaves(session, srUuid, vdiUuids): 

3812 coalesceable = [] 

3813 sr = SR.getInstance(srUuid, session) 

3814 sr.scanLocked() 

3815 for uuid in vdiUuids: 

3816 vdi = sr.getVDI(uuid) 

3817 if not vdi: 

3818 raise util.SMException("VDI %s not found" % uuid) 

3819 if vdi.isLeafCoalesceable(): 

3820 coalesceable.append(uuid) 

3821 return coalesceable 

3822 

3823 

3824def cache_cleanup(session, srUuid, maxAge): 

3825 sr = SR.getInstance(srUuid, session) 

3826 return sr.cleanupCache(maxAge) 

3827 

3828 

3829def debug(sr_uuid, cmd, vdi_uuid): 

3830 Util.log("Debug command: %s" % cmd) 

3831 sr = SR.getInstance(sr_uuid, None) 

3832 if not isinstance(sr, LVHDSR): 

3833 print("Error: not an LVHD SR") 

3834 return 

3835 sr.scanLocked() 

3836 vdi = sr.getVDI(vdi_uuid) 

3837 if not vdi: 

3838 print("Error: VDI %s not found") 

3839 return 

3840 print("Running %s on SR %s" % (cmd, sr)) 

3841 print("VDI before: %s" % vdi) 

3842 if cmd == "activate": 

3843 vdi._activate() 

3844 print("VDI file: %s" % vdi.path) 

3845 if cmd == "deactivate": 

3846 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

3847 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

3848 if cmd == "inflate": 

3849 vdi.inflateFully() 

3850 sr.cleanup() 

3851 if cmd == "deflate": 

3852 vdi.deflate() 

3853 sr.cleanup() 

3854 sr.scanLocked() 

3855 print("VDI after: %s" % vdi) 

3856 

3857 

3858def abort_optional_reenable(uuid): 

3859 print("Disabling GC/coalesce for %s" % uuid) 

3860 ret = _abort(uuid) 

3861 input("Press enter to re-enable...") 

3862 print("GC/coalesce re-enabled") 

3863 lockRunning.release() 

3864 if ret: 

3865 lockActive.release() 

3866 

3867 

3868############################################################################## 

3869# 

3870# CLI 

3871# 

3872def main(): 

3873 action = "" 

3874 uuid = "" 

3875 background = False 

3876 force = False 

3877 dryRun = False 

3878 debug_cmd = "" 

3879 vdi_uuid = "" 

3880 shortArgs = "gGc:aqxu:bfdt:v:" 

3881 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

3882 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

3883 

3884 try: 

3885 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

3886 except getopt.GetoptError: 

3887 usage() 

3888 for o, a in opts: 

3889 if o in ("-g", "--gc"): 

3890 action = "gc" 

3891 if o in ("-G", "--gc_force"): 

3892 action = "gc_force" 

3893 if o in ("-c", "--clean_cache"): 

3894 action = "clean_cache" 

3895 maxAge = int(a) 

3896 if o in ("-a", "--abort"): 

3897 action = "abort" 

3898 if o in ("-q", "--query"): 

3899 action = "query" 

3900 if o in ("-x", "--disable"): 

3901 action = "disable" 

3902 if o in ("-u", "--uuid"): 

3903 uuid = a 

3904 if o in ("-b", "--background"): 

3905 background = True 

3906 if o in ("-f", "--force"): 

3907 force = True 

3908 if o in ("-d", "--dry-run"): 

3909 Util.log("Dry run mode") 

3910 dryRun = True 

3911 if o in ("-t", "--debug"): 

3912 action = "debug" 

3913 debug_cmd = a 

3914 if o in ("-v", "--vdi_uuid"): 

3915 vdi_uuid = a 

3916 

3917 if not action or not uuid: 

3918 usage() 

3919 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

3920 action != "debug" and (debug_cmd or vdi_uuid): 

3921 usage() 

3922 

3923 if action != "query" and action != "debug": 

3924 print("All output goes to log") 

3925 

3926 if action == "gc": 

3927 gc(None, uuid, background, dryRun) 

3928 elif action == "gc_force": 

3929 gc_force(None, uuid, force, dryRun, True) 

3930 elif action == "clean_cache": 

3931 cache_cleanup(None, uuid, maxAge) 

3932 elif action == "abort": 

3933 abort(uuid) 

3934 elif action == "query": 

3935 print("Currently running: %s" % get_state(uuid)) 

3936 elif action == "disable": 

3937 abort_optional_reenable(uuid) 

3938 elif action == "debug": 

3939 debug(uuid, debug_cmd, vdi_uuid) 

3940 

3941 

3942if __name__ == '__main__': 3942 ↛ 3943line 3942 didn't jump to line 3943, because the condition on line 3942 was never true

3943 main()