From d3137fdc7306fa6a763263b4e1452449e3b48206 Mon Sep 17 00:00:00 2001 From: J M Date: Fri, 3 Apr 2026 15:42:58 +0800 Subject: [PATCH 1/6] [storage]: exclude deleted volumes from recalculate and sync physical capacity on reconnect Resolves: ZSTAC-80937 Change-Id: I5380a3cb223c675c20418831fffe5c107089b5ea --- .../org/zstack/storage/primary/PrimaryStorageBase.java | 8 ++++---- .../primary/PrimaryStorageCapacityRecalculator.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java index 42bc1a5a57f..8a90b849d82 100755 --- a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java +++ b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java @@ -655,10 +655,10 @@ public void success() { changeStatus(PrimaryStorageStatus.Connected); logger.debug(String.format("successfully connected primary storage[uuid:%s]", self.getUuid())); - RecalculatePrimaryStorageCapacityMsg rmsg = new RecalculatePrimaryStorageCapacityMsg(); - rmsg.setPrimaryStorageUuid(self.getUuid()); - bus.makeLocalServiceId(rmsg, PrimaryStorageConstant.SERVICE_ID); - bus.send(rmsg); + SyncPrimaryStorageCapacityMsg smsg = new SyncPrimaryStorageCapacityMsg(); + smsg.setPrimaryStorageUuid(self.getUuid()); + bus.makeLocalServiceId(smsg, PrimaryStorageConstant.SERVICE_ID); + bus.send(smsg); tracker.track(self.getUuid()); diff --git a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java index d4b295ed559..70bb46bc2c2 100644 --- a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java +++ b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java @@ -75,7 +75,7 @@ public void run() { " group by vol.primaryStorageUuid"; TypedQuery q = dbf.getEntityManager().createQuery(sql, Tuple.class); q.setParameter("psUuids", psUuids); - List needCountVolumeStates = asList(VolumeStatus.Creating, VolumeStatus.Ready, VolumeStatus.Deleted); + List needCountVolumeStates = asList(VolumeStatus.Creating, VolumeStatus.Ready); q.setParameter("volStatus", needCountVolumeStates); List ts = q.getResultList(); From c43fbb34e14a243606131679354ceb89ecf68525 Mon Sep 17 00:00:00 2001 From: J M Date: Thu, 19 Mar 2026 19:06:14 +0800 Subject: [PATCH 2/6] [storage]: prevent Spring Bean mutation in SelectBackupStorageMsg handler Resolves: ZSTAC-80789 Change-Id: Ic9d89a477688248e24ba9099e8ca198cb04c9d92 --- storage/pom.xml | 5 + .../addon/primary/ExternalPrimaryStorage.java | 3 +- .../ExternalPrimaryStorageSelectBsTest.java | 99 +++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java diff --git a/storage/pom.xml b/storage/pom.xml index c45fa73c3c7..1f7deed4747 100755 --- a/storage/pom.xml +++ b/storage/pom.xml @@ -54,6 +54,11 @@ longjob ${project.version} + + junit + junit + test + diff --git a/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java b/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java index 837dfa74ce5..a769a508198 100644 --- a/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java +++ b/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java @@ -55,6 +55,7 @@ import org.zstack.utils.gson.JSONObjectUtil; import org.zstack.utils.logging.CLogger; +import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; @@ -409,7 +410,7 @@ private void handle(final SelectBackupStorageMsg msg) { BackupStorageSelector selector = pluginRgty.getExtensionFromMap(externalVO.getIdentity(), BackupStorageSelector.class); - List preferBsTypes = selector.getPreferBackupStorageTypes(); + List preferBsTypes = new ArrayList<>(selector.getPreferBackupStorageTypes()); if (!CollectionUtils.isEmpty(msg.getRequiredBackupStorageTypes())) { preferBsTypes.retainAll(msg.getRequiredBackupStorageTypes()); } diff --git a/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java b/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java new file mode 100644 index 00000000000..eeea28091ac --- /dev/null +++ b/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java @@ -0,0 +1,99 @@ +package org.zstack.storage.addon.primary; + +import org.junit.Assert; +import org.junit.Test; +import org.zstack.header.storage.addon.primary.BackupStorageSelector; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Regression test for ZSTAC-80789. + * + * BackupStorageSelector implementations (e.g. XInfiniStorageFactory, ExponStorageFactory) + * are Spring beans whose getPreferBackupStorageTypes() returns a direct reference to an + * internal field. If the caller mutates that list (e.g. via retainAll), the bean's state + * is permanently corrupted, causing all subsequent SelectBackupStorageMsg handling to fail. + * + * The fix in ExternalPrimaryStorage wraps the returned list with {@code new ArrayList<>(...)}. + * This test verifies that pattern. + */ +public class ExternalPrimaryStorageSelectBsTest { + + /** + * Simulates a BackupStorageSelector that returns its internal field directly, + * just like XInfiniStorageFactory and ExponStorageFactory do. + */ + private static class FakeBackupStorageSelector implements BackupStorageSelector { + private List preferBackupStorageTypes; + + FakeBackupStorageSelector(List types) { + this.preferBackupStorageTypes = types; + } + + @Override + public List getPreferBackupStorageTypes() { + return preferBackupStorageTypes; + } + + @Override + public String getIdentity() { + return "fake"; + } + } + + @Test + public void testDefensiveCopyPreventsBeaMutation() { + // Setup: simulate a Spring bean with mutable internal state + List beanInternalList = new ArrayList<>(Arrays.asList("ImageStore", "Ceph", "FusionStor")); + FakeBackupStorageSelector selector = new FakeBackupStorageSelector(beanInternalList); + + List originalTypes = new ArrayList<>(beanInternalList); + + // First call: simulate handle(SelectBackupStorageMsg) with requiredBackupStorageTypes = ["ImageStore"] + // The fix: defensive copy before retainAll + List preferBsTypes = new ArrayList<>(selector.getPreferBackupStorageTypes()); + List requiredTypes = Arrays.asList("ImageStore"); + preferBsTypes.retainAll(requiredTypes); + + // After retainAll, the working copy should be filtered + Assert.assertEquals(Arrays.asList("ImageStore"), preferBsTypes); + + // The bean's internal state must remain unchanged + Assert.assertEquals(originalTypes, selector.getPreferBackupStorageTypes()); + Assert.assertEquals(3, selector.getPreferBackupStorageTypes().size()); + + // Second call: simulate another handle(SelectBackupStorageMsg) with requiredBackupStorageTypes = ["Ceph"] + List preferBsTypes2 = new ArrayList<>(selector.getPreferBackupStorageTypes()); + List requiredTypes2 = Arrays.asList("Ceph"); + preferBsTypes2.retainAll(requiredTypes2); + + Assert.assertEquals(Arrays.asList("Ceph"), preferBsTypes2); + + // Bean's internal state still unchanged after two calls + Assert.assertEquals(originalTypes, selector.getPreferBackupStorageTypes()); + } + + @Test + public void testWithoutDefensiveCopyBeanIsMutated() { + // This test demonstrates the original bug: without defensive copy, retainAll mutates the bean + List beanInternalList = new ArrayList<>(Arrays.asList("ImageStore", "Ceph", "FusionStor")); + FakeBackupStorageSelector selector = new FakeBackupStorageSelector(beanInternalList); + + // Without defensive copy (the old buggy code path) + List preferBsTypes = selector.getPreferBackupStorageTypes(); + preferBsTypes.retainAll(Arrays.asList("ImageStore")); + + // The bean's internal list is now corrupted + Assert.assertEquals(1, selector.getPreferBackupStorageTypes().size()); + Assert.assertEquals(Arrays.asList("ImageStore"), selector.getPreferBackupStorageTypes()); + + // Second call with "Ceph" now fails because the bean only has "ImageStore" left + List preferBsTypes2 = selector.getPreferBackupStorageTypes(); + preferBsTypes2.retainAll(Arrays.asList("Ceph")); + + // The list is now empty - this is the bug that caused clone VM to fail + Assert.assertTrue(selector.getPreferBackupStorageTypes().isEmpty()); + } +} From 8a71c3889c54c714eef65377693a1905b4c70dce Mon Sep 17 00:00:00 2001 From: J M Date: Wed, 15 Apr 2026 18:27:56 +0800 Subject: [PATCH 3/6] [storage]: adjust PS availableCapacity when volume size changes during syncVolumeSize Resolves: ZSTAC-80937 Change-Id: Idb146c176e50dd3748ae408c0ae1dab5a999935c --- .../org/zstack/storage/volume/VolumeBase.java | 22 +- .../NfsSyncVolumeSizeCapacityCase.groovy | 272 ++++++++++++++++++ 2 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java index 390e79c54fd..6224a67c569 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java @@ -2169,6 +2169,7 @@ private void getPrimaryStorageCapacities(Map ret) { private void syncVolumeVolumeSize(final ReturnValueCompletion completion) { refreshVO(); + long oldSize = self.getSize(); SyncVolumeSizeOnPrimaryStorageMsg smsg = new SyncVolumeSizeOnPrimaryStorageMsg(); smsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); smsg.setVolumeUuid(self.getUuid()); @@ -2184,7 +2185,8 @@ public void run(MessageReply reply) { refreshVO(); SyncVolumeSizeOnPrimaryStorageReply r = reply.castReply(); - self.setSize(r.getSize()); + long newSize = r.getSize(); + self.setSize(newSize); if (!r.isWithInternalSnapshot()) { // the actual size = volume actual size + all snapshot size @@ -2196,6 +2198,24 @@ public void run(MessageReply reply) { self = dbf.updateAndRefresh(self); + // adjust primary storage available capacity when volume size changes + if (self.getPrimaryStorageUuid() != null && newSize != oldSize) { + long sizeDiff = newSize - oldSize; + if (sizeDiff > 0) { + DecreasePrimaryStorageCapacityMsg dmsg = new DecreasePrimaryStorageCapacityMsg(); + dmsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); + dmsg.setDiskSize(sizeDiff); + bus.makeTargetServiceIdByResourceUuid(dmsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); + bus.send(dmsg); + } else { + IncreasePrimaryStorageCapacityMsg imsg = new IncreasePrimaryStorageCapacityMsg(); + imsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); + imsg.setDiskSize(Math.abs(sizeDiff)); + bus.makeTargetServiceIdByResourceUuid(imsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); + bus.send(imsg); + } + } + VolumeSize size = new VolumeSize(); size.actualSize = self.getActualSize(); size.size = self.getSize(); diff --git a/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy new file mode 100644 index 00000000000..51142925c2b --- /dev/null +++ b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy @@ -0,0 +1,272 @@ +package org.zstack.test.integration.storage.primary.nfs.capacity + +import org.springframework.http.HttpEntity +import org.zstack.core.db.DatabaseFacade +import org.zstack.core.db.SQL +import org.zstack.header.storage.primary.PrimaryStorageCapacityVO +import org.zstack.header.volume.VolumeVO +import org.zstack.header.volume.VolumeVO_ +import org.zstack.sdk.GetPrimaryStorageCapacityResult +import org.zstack.sdk.PrimaryStorageInventory +import org.zstack.sdk.VmInstanceInventory +import org.zstack.test.integration.storage.StorageTest +import org.zstack.testlib.EnvSpec +import org.zstack.testlib.SubCase +import org.zstack.testlib.NfsPrimaryStorageSpec +import org.zstack.storage.primary.nfs.NfsPrimaryStorageKVMBackend +import org.zstack.storage.primary.nfs.NfsPrimaryStorageKVMBackendCommands +import org.zstack.utils.data.SizeUnit +import org.zstack.utils.gson.JSONObjectUtil +import org.zstack.header.image.ImageConstant +import org.zstack.header.network.service.NetworkServiceType +import org.zstack.network.securitygroup.SecurityGroupConstant +import org.zstack.network.service.virtualrouter.VirtualRouterConstant + +/** + * ZSTAC-80937: syncVolumeSize updates VolumeVO.size without adjusting + * PrimaryStorage availableCapacity, causing allocation rate drift. + * + * Test strategy: mock GET_VOLUME_SIZE_PATH to return a larger size, + * call APISyncVolumeSizeMsg, verify that availableCapacity is adjusted. + */ +class NfsSyncVolumeSizeCapacityCase extends SubCase { + EnvSpec env + DatabaseFacade dbf + + @Override + void clean() { + env.delete() + } + + @Override + void setup() { + useSpring(StorageTest.springSpec) + } + + @Override + void environment() { + env = env { + instanceOffering { + name = "instanceOffering" + memory = SizeUnit.GIGABYTE.toByte(8) + cpu = 4 + } + + diskOffering { + name = "diskOffering" + diskSize = SizeUnit.GIGABYTE.toByte(20) + } + + sftpBackupStorage { + name = "sftp" + url = "/sftp" + username = "root" + password = "password" + hostname = "localhost" + + image { + name = "image1" + url = "http://zstack.org/download/test.qcow2" + } + + image { + name = "vr" + url = "http://zstack.org/download/vr.qcow2" + } + } + + zone { + name = "zone" + description = "test" + + cluster { + name = "cluster" + hypervisorType = "KVM" + + kvm { + name = "kvm" + managementIp = "localhost" + username = "root" + password = "password" + } + + attachPrimaryStorage("nfs") + attachL2Network("l2") + } + + nfsPrimaryStorage { + name = "nfs" + url = "localhost:/nfs" + } + + l2NoVlanNetwork { + name = "l2" + physicalInterface = "eth0" + + l3Network { + name = "l3" + + service { + provider = VirtualRouterConstant.PROVIDER_TYPE + types = [NetworkServiceType.DHCP.toString(), NetworkServiceType.DNS.toString()] + } + + service { + provider = SecurityGroupConstant.SECURITY_GROUP_PROVIDER_TYPE + types = [SecurityGroupConstant.SECURITY_GROUP_NETWORK_SERVICE_TYPE] + } + + ip { + startIp = "192.168.100.10" + endIp = "192.168.100.100" + netmask = "255.255.255.0" + gateway = "192.168.100.1" + } + } + + l3Network { + name = "pubL3" + + ip { + startIp = "12.16.10.10" + endIp = "12.16.10.100" + netmask = "255.255.255.0" + gateway = "12.16.10.1" + } + } + } + + virtualRouterOffering { + name = "vr" + memory = SizeUnit.MEGABYTE.toByte(512) + cpu = 2 + useManagementL3Network("pubL3") + usePublicL3Network("pubL3") + useImage("vr") + } + + attachBackupStorage("sftp") + } + + vm { + name = "vm" + useInstanceOffering("instanceOffering") + useImage("image1") + useL3Networks("l3") + useRootDiskOffering("diskOffering") + useHost("kvm") + } + } + } + + @Override + void test() { + env.create { + dbf = bean(DatabaseFacade.class) + testSyncVolumeSizeAdjustsCapacity() + } + } + + /** + * Core test: when APISyncVolumeSizeMsg returns a larger size, + * the primary storage availableCapacity should decrease accordingly. + * + * Before fix: availableCapacity stayed the same after sync + * (causing drift between incremental tracking and recalculation). + * After fix: availableCapacity is properly adjusted. + */ + void testSyncVolumeSizeAdjustsCapacity() { + PrimaryStorageInventory ps = env.inventoryByName("nfs") + VmInstanceInventory vm = env.inventoryByName("vm") + String rootVolumeUuid = vm.rootVolumeUuid + + // Ensure the volume has a known non-zero size in DB + long initialSize = SizeUnit.GIGABYTE.toByte(20) + SQL.New(VolumeVO.class) + .eq(VolumeVO_.uuid, rootVolumeUuid) + .set(VolumeVO_.size, initialSize) + .update() + + long sizeIncrease = SizeUnit.GIGABYTE.toByte(10) + long newSize = initialSize + sizeIncrease + + // Mock the NFS agent to report the volume has grown + env.simulator(NfsPrimaryStorageKVMBackend.GET_VOLUME_SIZE_PATH) { HttpEntity e, EnvSpec espec -> + def rsp = new NfsPrimaryStorageKVMBackendCommands.GetVolumeActualSizeRsp() + rsp.size = newSize + rsp.actualSize = newSize + return rsp + } + + // Mock remount to avoid physical capacity changes during reconnect + env.simulator(NfsPrimaryStorageKVMBackend.REMOUNT_PATH) { HttpEntity e, EnvSpec espec -> + def cmd = JSONObjectUtil.toObject(e.getBody(), NfsPrimaryStorageKVMBackendCommands.RemountCmd.class) + NfsPrimaryStorageSpec spec = espec.specByUuid(cmd.uuid) as NfsPrimaryStorageSpec + def rsp = new NfsPrimaryStorageKVMBackendCommands.NfsPrimaryStorageAgentResponse() + rsp.totalCapacity = spec.totalCapacity + rsp.availableCapacity = spec.availableCapacity + return rsp + } + + // Step 1: Baseline — reconnect to establish consistent capacity + reconnectPrimaryStorage { + uuid = ps.uuid + } + + GetPrimaryStorageCapacityResult baseline = getPrimaryStorageCapacity { + primaryStorageUuids = [ps.uuid] + } + + // Step 2: Trigger volume size sync — agent reports larger size + syncVolumeSize { + uuid = rootVolumeUuid + } + + // Verify volume size was updated in DB + VolumeVO vol = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) + logger.info("after syncVolumeSize: vol.size=${vol.getSize()}, expected=${newSize}") + + GetPrimaryStorageCapacityResult afterSync = getPrimaryStorageCapacity { + primaryStorageUuids = [ps.uuid] + } + + logger.info("capacity: baseline.available=${baseline.availableCapacity}, afterSync.available=${afterSync.availableCapacity}") + + // Core assertion: if the volume size increased, availableCapacity must decrease + // Before fix: afterSync.availableCapacity == baseline.availableCapacity (no adjustment) + // After fix: afterSync.availableCapacity < baseline.availableCapacity + if (vol.getSize() == newSize) { + // Sync worked — verify capacity adjustment + assert afterSync.availableCapacity < baseline.availableCapacity : + "ZSTAC-80937: availableCapacity should decrease when volume size increases via sync. " + + "baseline=${baseline.availableCapacity}, afterSync=${afterSync.availableCapacity}, " + + "sizeIncrease=${sizeIncrease}" + } else { + // Sync mock didn't work — fall back to recalculate-based verification + logger.info("sync mock did not take effect, verifying via recalculate path") + + // Directly update vol size in DB (simulating backend growth) + SQL.New(VolumeVO.class) + .eq(VolumeVO_.uuid, rootVolumeUuid) + .set(VolumeVO_.size, newSize) + .update() + + // Reconnect triggers recalculate which reads actual vol.size from DB + reconnectPrimaryStorage { + uuid = ps.uuid + } + + GetPrimaryStorageCapacityResult afterRecalc = getPrimaryStorageCapacity { + primaryStorageUuids = [ps.uuid] + } + + // After recalculation, capacity should reflect the new larger volume size + assert afterRecalc.availableCapacity < baseline.availableCapacity : + "ZSTAC-80937: availableCapacity should decrease after recalculate catches volume growth. " + + "baseline=${baseline.availableCapacity}, afterRecalc=${afterRecalc.availableCapacity}, " + + "sizeIncrease=${sizeIncrease}" + + logger.info("VERIFIED via recalculate: capacity dropped from ${baseline.availableCapacity} to ${afterRecalc.availableCapacity}") + } + } +} From 7da801e251458c789affb512a0737711a543c915 Mon Sep 17 00:00:00 2001 From: J M Date: Wed, 15 Apr 2026 19:06:10 +0800 Subject: [PATCH 4/6] [test]: fix storage Resolves: ZSTAC-80937 Change-Id: Ie3a540bd1091e143c62dc4a3cc0c9557cc27e078 --- .../NfsSyncVolumeSizeCapacityCase.groovy | 116 ++++++------------ 1 file changed, 37 insertions(+), 79 deletions(-) diff --git a/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy index 51142925c2b..60e3d9dbb3f 100644 --- a/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy +++ b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy @@ -1,23 +1,16 @@ package org.zstack.test.integration.storage.primary.nfs.capacity -import org.springframework.http.HttpEntity import org.zstack.core.db.DatabaseFacade -import org.zstack.core.db.SQL import org.zstack.header.storage.primary.PrimaryStorageCapacityVO import org.zstack.header.volume.VolumeVO -import org.zstack.header.volume.VolumeVO_ import org.zstack.sdk.GetPrimaryStorageCapacityResult import org.zstack.sdk.PrimaryStorageInventory import org.zstack.sdk.VmInstanceInventory import org.zstack.test.integration.storage.StorageTest import org.zstack.testlib.EnvSpec import org.zstack.testlib.SubCase -import org.zstack.testlib.NfsPrimaryStorageSpec import org.zstack.storage.primary.nfs.NfsPrimaryStorageKVMBackend -import org.zstack.storage.primary.nfs.NfsPrimaryStorageKVMBackendCommands import org.zstack.utils.data.SizeUnit -import org.zstack.utils.gson.JSONObjectUtil -import org.zstack.header.image.ImageConstant import org.zstack.header.network.service.NetworkServiceType import org.zstack.network.securitygroup.SecurityGroupConstant import org.zstack.network.service.virtualrouter.VirtualRouterConstant @@ -180,93 +173,58 @@ class NfsSyncVolumeSizeCapacityCase extends SubCase { VmInstanceInventory vm = env.inventoryByName("vm") String rootVolumeUuid = vm.rootVolumeUuid - // Ensure the volume has a known non-zero size in DB - long initialSize = SizeUnit.GIGABYTE.toByte(20) - SQL.New(VolumeVO.class) - .eq(VolumeVO_.uuid, rootVolumeUuid) - .set(VolumeVO_.size, initialSize) - .update() - long sizeIncrease = SizeUnit.GIGABYTE.toByte(10) - long newSize = initialSize + sizeIncrease - - // Mock the NFS agent to report the volume has grown - env.simulator(NfsPrimaryStorageKVMBackend.GET_VOLUME_SIZE_PATH) { HttpEntity e, EnvSpec espec -> - def rsp = new NfsPrimaryStorageKVMBackendCommands.GetVolumeActualSizeRsp() - rsp.size = newSize - rsp.actualSize = newSize - return rsp - } - - // Mock remount to avoid physical capacity changes during reconnect - env.simulator(NfsPrimaryStorageKVMBackend.REMOUNT_PATH) { HttpEntity e, EnvSpec espec -> - def cmd = JSONObjectUtil.toObject(e.getBody(), NfsPrimaryStorageKVMBackendCommands.RemountCmd.class) - NfsPrimaryStorageSpec spec = espec.specByUuid(cmd.uuid) as NfsPrimaryStorageSpec - def rsp = new NfsPrimaryStorageKVMBackendCommands.NfsPrimaryStorageAgentResponse() - rsp.totalCapacity = spec.totalCapacity - rsp.availableCapacity = spec.availableCapacity - return rsp - } // Step 1: Baseline — reconnect to establish consistent capacity reconnectPrimaryStorage { uuid = ps.uuid } + VolumeVO volBefore = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) + long initialSize = volBefore.getSize() + long newSize = initialSize + sizeIncrease + GetPrimaryStorageCapacityResult baseline = getPrimaryStorageCapacity { primaryStorageUuids = [ps.uuid] } - // Step 2: Trigger volume size sync — agent reports larger size + PrimaryStorageCapacityVO capBefore = dbf.findByUuid(ps.uuid, PrimaryStorageCapacityVO.class) + logger.info("BASELINE: vol.size=${initialSize}, ps.available=${capBefore.availableCapacity}") + + // Step 2: hijack the NFS agent response to report larger volume size + env.hijackSimulator(NfsPrimaryStorageKVMBackend.GET_VOLUME_SIZE_PATH) { rsp -> + rsp.size = newSize + rsp.actualSize = newSize + return rsp + } + + // Step 3: Trigger volume size sync — agent reports larger size syncVolumeSize { uuid = rootVolumeUuid } // Verify volume size was updated in DB - VolumeVO vol = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) - logger.info("after syncVolumeSize: vol.size=${vol.getSize()}, expected=${newSize}") - - GetPrimaryStorageCapacityResult afterSync = getPrimaryStorageCapacity { - primaryStorageUuids = [ps.uuid] - } - - logger.info("capacity: baseline.available=${baseline.availableCapacity}, afterSync.available=${afterSync.availableCapacity}") - - // Core assertion: if the volume size increased, availableCapacity must decrease - // Before fix: afterSync.availableCapacity == baseline.availableCapacity (no adjustment) - // After fix: afterSync.availableCapacity < baseline.availableCapacity - if (vol.getSize() == newSize) { - // Sync worked — verify capacity adjustment - assert afterSync.availableCapacity < baseline.availableCapacity : - "ZSTAC-80937: availableCapacity should decrease when volume size increases via sync. " + - "baseline=${baseline.availableCapacity}, afterSync=${afterSync.availableCapacity}, " + - "sizeIncrease=${sizeIncrease}" - } else { - // Sync mock didn't work — fall back to recalculate-based verification - logger.info("sync mock did not take effect, verifying via recalculate path") - - // Directly update vol size in DB (simulating backend growth) - SQL.New(VolumeVO.class) - .eq(VolumeVO_.uuid, rootVolumeUuid) - .set(VolumeVO_.size, newSize) - .update() - - // Reconnect triggers recalculate which reads actual vol.size from DB - reconnectPrimaryStorage { - uuid = ps.uuid - } - - GetPrimaryStorageCapacityResult afterRecalc = getPrimaryStorageCapacity { - primaryStorageUuids = [ps.uuid] - } - - // After recalculation, capacity should reflect the new larger volume size - assert afterRecalc.availableCapacity < baseline.availableCapacity : - "ZSTAC-80937: availableCapacity should decrease after recalculate catches volume growth. " + - "baseline=${baseline.availableCapacity}, afterRecalc=${afterRecalc.availableCapacity}, " + - "sizeIncrease=${sizeIncrease}" - - logger.info("VERIFIED via recalculate: capacity dropped from ${baseline.availableCapacity} to ${afterRecalc.availableCapacity}") - } + VolumeVO volAfter = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) + assert volAfter.getSize() == newSize : + "syncVolumeSize should update VolumeVO.size to the value from agent. " + + "expected=${newSize}, actual=${volAfter.getSize()}" + + PrimaryStorageCapacityVO capAfter = dbf.findByUuid(ps.uuid, PrimaryStorageCapacityVO.class) + logger.info("AFTER SYNC: vol.size=${volAfter.getSize()}, ps.available=${capAfter.availableCapacity}") + logger.info("DIFF: sizeIncrease=${sizeIncrease}, capacityDrop=${capBefore.availableCapacity - capAfter.availableCapacity}") + + // Core assertion: availableCapacity must decrease by the size increase + // Before fix: capAfter.availableCapacity == capBefore.availableCapacity (no adjustment) + // After fix: capAfter.availableCapacity == capBefore.availableCapacity - sizeIncrease + assert capAfter.availableCapacity < capBefore.availableCapacity : + "ZSTAC-80937: availableCapacity should decrease when volume size increases via syncVolumeSize. " + + "before=${capBefore.availableCapacity}, after=${capAfter.availableCapacity}, " + + "sizeIncrease=${sizeIncrease}" + + assert capAfter.availableCapacity == capBefore.availableCapacity - sizeIncrease : + "ZSTAC-80937: availableCapacity should decrease by exactly the size increase. " + + "expected=${capBefore.availableCapacity - sizeIncrease}, actual=${capAfter.availableCapacity}" + + logger.info("VERIFIED: capacity dropped from ${capBefore.availableCapacity} to ${capAfter.availableCapacity} (delta=${sizeIncrease})") } } From 2d721c3ae0765f035193334d37ee0e73e4698309 Mon Sep 17 00:00:00 2001 From: J M Date: Thu, 16 Apr 2026 11:37:21 +0800 Subject: [PATCH 5/6] [storage]: adjust PS capacity on syncVolumeSize Resolves: ZSTAC-80937 Change-Id: I7566666e79706c6c66726f616d706965687a7474 --- .../fix-artifacts/ZSTAC-80937/phase-5.json | 32 +++++++++++++++++++ .../org/zstack/storage/volume/VolumeBase.java | 4 +-- 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json diff --git a/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json new file mode 100644 index 00000000000..a8578fd09c0 --- /dev/null +++ b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json @@ -0,0 +1,32 @@ +{ + "phase": 5, + "issue": "ZSTAC-80937", + "title": "TDD GREEN - Fix PS availableCapacity drift on syncVolumeSize", + "status": "GREEN", + "data": { + "root_cause": "VolumeBase.syncVolumeVolumeSize() updates VolumeVO.size from the agent-reported value but does not send DecreasePrimaryStorageCapacityMsg/IncreasePrimaryStorageCapacityMsg to adjust the primary storage availableCapacity. Over time, every syncVolumeSize that reports a different size causes availableCapacity to drift from reality, inflating the storage pool allocation rate.", + "bug_location": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java:syncVolumeVolumeSize()", + "solution": "Record oldSize before the sync RPC. After receiving the reply, compare newSize vs oldSize. If the volume grew, send DecreasePrimaryStorageCapacityMsg with the delta. If it shrank, send IncreasePrimaryStorageCapacityMsg with the absolute delta. This keeps PS availableCapacity in sync with actual volume size changes.", + "files_modified": [ + "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java" + ], + "tdd_cycle": { + "phase": "GREEN", + "test_file": "test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy", + "test_class": "NfsSyncVolumeSizeCapacityCase", + "test_method": "testSyncVolumeSizeAdjustsCapacity", + "expected_result": "PASS - availableCapacity decreases by 10GB after syncVolumeSize reports a larger volume size", + "reproduction_steps": [ + "1. Create a VM with a root volume on NFS primary storage", + "2. Record baseline PS availableCapacity", + "3. Mock NFS GET_VOLUME_SIZE_PATH to return volume size + 10GB", + "4. Call APISyncVolumeSizeMsg on the root volume", + "5. Assert VolumeVO.size is updated (passes)", + "6. Assert PS availableCapacity decreased by 10GB (passes with fix)" + ] + }, + "fix_reverted": false, + "fix_file": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java", + "fix_description": "After updating VolumeVO.size, compare newSize vs oldSize. If different, send DecreasePrimaryStorageCapacityMsg (size grew) or IncreasePrimaryStorageCapacityMsg (size shrank) to keep PS availableCapacity in sync." + } +} diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java index 6224a67c569..84002c053d4 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java @@ -2196,8 +2196,6 @@ public void run(MessageReply reply) { self.setActualSize(r.getActualSize()); } - self = dbf.updateAndRefresh(self); - // adjust primary storage available capacity when volume size changes if (self.getPrimaryStorageUuid() != null && newSize != oldSize) { long sizeDiff = newSize - oldSize; @@ -2216,6 +2214,8 @@ public void run(MessageReply reply) { } } + self = dbf.updateAndRefresh(self); + VolumeSize size = new VolumeSize(); size.actualSize = self.getActualSize(); size.size = self.getSize(); From e1ecdd31dc42af2c7740ddfe5f54ab2a5f5999be Mon Sep 17 00:00:00 2001 From: J M Date: Thu, 16 Apr 2026 11:41:51 +0800 Subject: [PATCH 6/6] [storage]: adjust PS capacity on syncVolumeSize Resolves: ZSTAC-80937 Change-Id: I7566666e79706c6c66726f616d706965687a7474 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../fix-artifacts/ZSTAC-80937/phase-5.json | 4 ++-- .../org/zstack/storage/volume/VolumeBase.java | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json index a8578fd09c0..4f92992d0c3 100644 --- a/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json +++ b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json @@ -6,7 +6,7 @@ "data": { "root_cause": "VolumeBase.syncVolumeVolumeSize() updates VolumeVO.size from the agent-reported value but does not send DecreasePrimaryStorageCapacityMsg/IncreasePrimaryStorageCapacityMsg to adjust the primary storage availableCapacity. Over time, every syncVolumeSize that reports a different size causes availableCapacity to drift from reality, inflating the storage pool allocation rate.", "bug_location": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java:syncVolumeVolumeSize()", - "solution": "Record oldSize before the sync RPC. After receiving the reply, compare newSize vs oldSize. If the volume grew, send DecreasePrimaryStorageCapacityMsg with the delta. If it shrank, send IncreasePrimaryStorageCapacityMsg with the absolute delta. This keeps PS availableCapacity in sync with actual volume size changes.", + "solution": "In syncVolumeVolumeSize(), capture oldSize before updating. After dbf.updateAndRefresh(), compare newSize vs oldSize: if size grew, send DecreasePrimaryStorageCapacityMsg with the diff; if size shrank, send IncreasePrimaryStorageCapacityMsg with the abs diff. This keeps PS availableCapacity in sync with actual volume size changes.", "files_modified": [ "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java" ], @@ -27,6 +27,6 @@ }, "fix_reverted": false, "fix_file": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java", - "fix_description": "After updating VolumeVO.size, compare newSize vs oldSize. If different, send DecreasePrimaryStorageCapacityMsg (size grew) or IncreasePrimaryStorageCapacityMsg (size shrank) to keep PS availableCapacity in sync." + "fix_description": "After updating VolumeVO.size in syncVolumeVolumeSize(), compare newSize vs oldSize and send DecreasePrimaryStorageCapacityMsg (size grew) or IncreasePrimaryStorageCapacityMsg (size shrank) to keep PS availableCapacity in sync." } } diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java index 84002c053d4..9e1861d41e1 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java @@ -2169,7 +2169,6 @@ private void getPrimaryStorageCapacities(Map ret) { private void syncVolumeVolumeSize(final ReturnValueCompletion completion) { refreshVO(); - long oldSize = self.getSize(); SyncVolumeSizeOnPrimaryStorageMsg smsg = new SyncVolumeSizeOnPrimaryStorageMsg(); smsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); smsg.setVolumeUuid(self.getUuid()); @@ -2185,7 +2184,10 @@ public void run(MessageReply reply) { refreshVO(); SyncVolumeSizeOnPrimaryStorageReply r = reply.castReply(); + + long oldSize = self.getSize(); long newSize = r.getSize(); + self.setSize(newSize); if (!r.isWithInternalSnapshot()) { @@ -2196,26 +2198,25 @@ public void run(MessageReply reply) { self.setActualSize(r.getActualSize()); } - // adjust primary storage available capacity when volume size changes - if (self.getPrimaryStorageUuid() != null && newSize != oldSize) { - long sizeDiff = newSize - oldSize; - if (sizeDiff > 0) { + self = dbf.updateAndRefresh(self); + + if (newSize != oldSize && self.getPrimaryStorageUuid() != null) { + long diff = newSize - oldSize; + if (diff > 0) { DecreasePrimaryStorageCapacityMsg dmsg = new DecreasePrimaryStorageCapacityMsg(); dmsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); - dmsg.setDiskSize(sizeDiff); + dmsg.setDiskSize(diff); bus.makeTargetServiceIdByResourceUuid(dmsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); bus.send(dmsg); } else { IncreasePrimaryStorageCapacityMsg imsg = new IncreasePrimaryStorageCapacityMsg(); imsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); - imsg.setDiskSize(Math.abs(sizeDiff)); + imsg.setDiskSize(-diff); bus.makeTargetServiceIdByResourceUuid(imsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); bus.send(imsg); } } - self = dbf.updateAndRefresh(self); - VolumeSize size = new VolumeSize(); size.actualSize = self.getActualSize(); size.size = self.getSize();