diff --git a/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json new file mode 100644 index 00000000000..4f92992d0c3 --- /dev/null +++ b/docs/cache/fix-artifacts/ZSTAC-80937/phase-5.json @@ -0,0 +1,32 @@ +{ + "phase": 5, + "issue": "ZSTAC-80937", + "title": "TDD GREEN - Fix PS availableCapacity drift on syncVolumeSize", + "status": "GREEN", + "data": { + "root_cause": "VolumeBase.syncVolumeVolumeSize() updates VolumeVO.size from the agent-reported value but does not send DecreasePrimaryStorageCapacityMsg/IncreasePrimaryStorageCapacityMsg to adjust the primary storage availableCapacity. Over time, every syncVolumeSize that reports a different size causes availableCapacity to drift from reality, inflating the storage pool allocation rate.", + "bug_location": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java:syncVolumeVolumeSize()", + "solution": "In syncVolumeVolumeSize(), capture oldSize before updating. After dbf.updateAndRefresh(), compare newSize vs oldSize: if size grew, send DecreasePrimaryStorageCapacityMsg with the diff; if size shrank, send IncreasePrimaryStorageCapacityMsg with the abs diff. This keeps PS availableCapacity in sync with actual volume size changes.", + "files_modified": [ + "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java" + ], + "tdd_cycle": { + "phase": "GREEN", + "test_file": "test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy", + "test_class": "NfsSyncVolumeSizeCapacityCase", + "test_method": "testSyncVolumeSizeAdjustsCapacity", + "expected_result": "PASS - availableCapacity decreases by 10GB after syncVolumeSize reports a larger volume size", + "reproduction_steps": [ + "1. Create a VM with a root volume on NFS primary storage", + "2. Record baseline PS availableCapacity", + "3. Mock NFS GET_VOLUME_SIZE_PATH to return volume size + 10GB", + "4. Call APISyncVolumeSizeMsg on the root volume", + "5. Assert VolumeVO.size is updated (passes)", + "6. Assert PS availableCapacity decreased by 10GB (passes with fix)" + ] + }, + "fix_reverted": false, + "fix_file": "storage/src/main/java/org/zstack/storage/volume/VolumeBase.java", + "fix_description": "After updating VolumeVO.size in syncVolumeVolumeSize(), compare newSize vs oldSize and send DecreasePrimaryStorageCapacityMsg (size grew) or IncreasePrimaryStorageCapacityMsg (size shrank) to keep PS availableCapacity in sync." + } +} diff --git a/storage/pom.xml b/storage/pom.xml index c45fa73c3c7..1f7deed4747 100755 --- a/storage/pom.xml +++ b/storage/pom.xml @@ -54,6 +54,11 @@ longjob ${project.version} + + junit + junit + test + diff --git a/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java b/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java index 837dfa74ce5..a769a508198 100644 --- a/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java +++ b/storage/src/main/java/org/zstack/storage/addon/primary/ExternalPrimaryStorage.java @@ -55,6 +55,7 @@ import org.zstack.utils.gson.JSONObjectUtil; import org.zstack.utils.logging.CLogger; +import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; @@ -409,7 +410,7 @@ private void handle(final SelectBackupStorageMsg msg) { BackupStorageSelector selector = pluginRgty.getExtensionFromMap(externalVO.getIdentity(), BackupStorageSelector.class); - List preferBsTypes = selector.getPreferBackupStorageTypes(); + List preferBsTypes = new ArrayList<>(selector.getPreferBackupStorageTypes()); if (!CollectionUtils.isEmpty(msg.getRequiredBackupStorageTypes())) { preferBsTypes.retainAll(msg.getRequiredBackupStorageTypes()); } diff --git a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java index 42bc1a5a57f..8a90b849d82 100755 --- a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java +++ b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageBase.java @@ -655,10 +655,10 @@ public void success() { changeStatus(PrimaryStorageStatus.Connected); logger.debug(String.format("successfully connected primary storage[uuid:%s]", self.getUuid())); - RecalculatePrimaryStorageCapacityMsg rmsg = new RecalculatePrimaryStorageCapacityMsg(); - rmsg.setPrimaryStorageUuid(self.getUuid()); - bus.makeLocalServiceId(rmsg, PrimaryStorageConstant.SERVICE_ID); - bus.send(rmsg); + SyncPrimaryStorageCapacityMsg smsg = new SyncPrimaryStorageCapacityMsg(); + smsg.setPrimaryStorageUuid(self.getUuid()); + bus.makeLocalServiceId(smsg, PrimaryStorageConstant.SERVICE_ID); + bus.send(smsg); tracker.track(self.getUuid()); diff --git a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java index d4b295ed559..70bb46bc2c2 100644 --- a/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java +++ b/storage/src/main/java/org/zstack/storage/primary/PrimaryStorageCapacityRecalculator.java @@ -75,7 +75,7 @@ public void run() { " group by vol.primaryStorageUuid"; TypedQuery q = dbf.getEntityManager().createQuery(sql, Tuple.class); q.setParameter("psUuids", psUuids); - List needCountVolumeStates = asList(VolumeStatus.Creating, VolumeStatus.Ready, VolumeStatus.Deleted); + List needCountVolumeStates = asList(VolumeStatus.Creating, VolumeStatus.Ready); q.setParameter("volStatus", needCountVolumeStates); List ts = q.getResultList(); diff --git a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java index 390e79c54fd..9e1861d41e1 100755 --- a/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java +++ b/storage/src/main/java/org/zstack/storage/volume/VolumeBase.java @@ -2184,7 +2184,11 @@ public void run(MessageReply reply) { refreshVO(); SyncVolumeSizeOnPrimaryStorageReply r = reply.castReply(); - self.setSize(r.getSize()); + + long oldSize = self.getSize(); + long newSize = r.getSize(); + + self.setSize(newSize); if (!r.isWithInternalSnapshot()) { // the actual size = volume actual size + all snapshot size @@ -2196,6 +2200,23 @@ public void run(MessageReply reply) { self = dbf.updateAndRefresh(self); + if (newSize != oldSize && self.getPrimaryStorageUuid() != null) { + long diff = newSize - oldSize; + if (diff > 0) { + DecreasePrimaryStorageCapacityMsg dmsg = new DecreasePrimaryStorageCapacityMsg(); + dmsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); + dmsg.setDiskSize(diff); + bus.makeTargetServiceIdByResourceUuid(dmsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); + bus.send(dmsg); + } else { + IncreasePrimaryStorageCapacityMsg imsg = new IncreasePrimaryStorageCapacityMsg(); + imsg.setPrimaryStorageUuid(self.getPrimaryStorageUuid()); + imsg.setDiskSize(-diff); + bus.makeTargetServiceIdByResourceUuid(imsg, PrimaryStorageConstant.SERVICE_ID, self.getPrimaryStorageUuid()); + bus.send(imsg); + } + } + VolumeSize size = new VolumeSize(); size.actualSize = self.getActualSize(); size.size = self.getSize(); diff --git a/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java b/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java new file mode 100644 index 00000000000..eeea28091ac --- /dev/null +++ b/storage/src/test/java/org/zstack/storage/addon/primary/ExternalPrimaryStorageSelectBsTest.java @@ -0,0 +1,99 @@ +package org.zstack.storage.addon.primary; + +import org.junit.Assert; +import org.junit.Test; +import org.zstack.header.storage.addon.primary.BackupStorageSelector; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Regression test for ZSTAC-80789. + * + * BackupStorageSelector implementations (e.g. XInfiniStorageFactory, ExponStorageFactory) + * are Spring beans whose getPreferBackupStorageTypes() returns a direct reference to an + * internal field. If the caller mutates that list (e.g. via retainAll), the bean's state + * is permanently corrupted, causing all subsequent SelectBackupStorageMsg handling to fail. + * + * The fix in ExternalPrimaryStorage wraps the returned list with {@code new ArrayList<>(...)}. + * This test verifies that pattern. + */ +public class ExternalPrimaryStorageSelectBsTest { + + /** + * Simulates a BackupStorageSelector that returns its internal field directly, + * just like XInfiniStorageFactory and ExponStorageFactory do. + */ + private static class FakeBackupStorageSelector implements BackupStorageSelector { + private List preferBackupStorageTypes; + + FakeBackupStorageSelector(List types) { + this.preferBackupStorageTypes = types; + } + + @Override + public List getPreferBackupStorageTypes() { + return preferBackupStorageTypes; + } + + @Override + public String getIdentity() { + return "fake"; + } + } + + @Test + public void testDefensiveCopyPreventsBeaMutation() { + // Setup: simulate a Spring bean with mutable internal state + List beanInternalList = new ArrayList<>(Arrays.asList("ImageStore", "Ceph", "FusionStor")); + FakeBackupStorageSelector selector = new FakeBackupStorageSelector(beanInternalList); + + List originalTypes = new ArrayList<>(beanInternalList); + + // First call: simulate handle(SelectBackupStorageMsg) with requiredBackupStorageTypes = ["ImageStore"] + // The fix: defensive copy before retainAll + List preferBsTypes = new ArrayList<>(selector.getPreferBackupStorageTypes()); + List requiredTypes = Arrays.asList("ImageStore"); + preferBsTypes.retainAll(requiredTypes); + + // After retainAll, the working copy should be filtered + Assert.assertEquals(Arrays.asList("ImageStore"), preferBsTypes); + + // The bean's internal state must remain unchanged + Assert.assertEquals(originalTypes, selector.getPreferBackupStorageTypes()); + Assert.assertEquals(3, selector.getPreferBackupStorageTypes().size()); + + // Second call: simulate another handle(SelectBackupStorageMsg) with requiredBackupStorageTypes = ["Ceph"] + List preferBsTypes2 = new ArrayList<>(selector.getPreferBackupStorageTypes()); + List requiredTypes2 = Arrays.asList("Ceph"); + preferBsTypes2.retainAll(requiredTypes2); + + Assert.assertEquals(Arrays.asList("Ceph"), preferBsTypes2); + + // Bean's internal state still unchanged after two calls + Assert.assertEquals(originalTypes, selector.getPreferBackupStorageTypes()); + } + + @Test + public void testWithoutDefensiveCopyBeanIsMutated() { + // This test demonstrates the original bug: without defensive copy, retainAll mutates the bean + List beanInternalList = new ArrayList<>(Arrays.asList("ImageStore", "Ceph", "FusionStor")); + FakeBackupStorageSelector selector = new FakeBackupStorageSelector(beanInternalList); + + // Without defensive copy (the old buggy code path) + List preferBsTypes = selector.getPreferBackupStorageTypes(); + preferBsTypes.retainAll(Arrays.asList("ImageStore")); + + // The bean's internal list is now corrupted + Assert.assertEquals(1, selector.getPreferBackupStorageTypes().size()); + Assert.assertEquals(Arrays.asList("ImageStore"), selector.getPreferBackupStorageTypes()); + + // Second call with "Ceph" now fails because the bean only has "ImageStore" left + List preferBsTypes2 = selector.getPreferBackupStorageTypes(); + preferBsTypes2.retainAll(Arrays.asList("Ceph")); + + // The list is now empty - this is the bug that caused clone VM to fail + Assert.assertTrue(selector.getPreferBackupStorageTypes().isEmpty()); + } +} diff --git a/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy new file mode 100644 index 00000000000..60e3d9dbb3f --- /dev/null +++ b/test/src/test/groovy/org/zstack/test/integration/storage/primary/nfs/capacity/NfsSyncVolumeSizeCapacityCase.groovy @@ -0,0 +1,230 @@ +package org.zstack.test.integration.storage.primary.nfs.capacity + +import org.zstack.core.db.DatabaseFacade +import org.zstack.header.storage.primary.PrimaryStorageCapacityVO +import org.zstack.header.volume.VolumeVO +import org.zstack.sdk.GetPrimaryStorageCapacityResult +import org.zstack.sdk.PrimaryStorageInventory +import org.zstack.sdk.VmInstanceInventory +import org.zstack.test.integration.storage.StorageTest +import org.zstack.testlib.EnvSpec +import org.zstack.testlib.SubCase +import org.zstack.storage.primary.nfs.NfsPrimaryStorageKVMBackend +import org.zstack.utils.data.SizeUnit +import org.zstack.header.network.service.NetworkServiceType +import org.zstack.network.securitygroup.SecurityGroupConstant +import org.zstack.network.service.virtualrouter.VirtualRouterConstant + +/** + * ZSTAC-80937: syncVolumeSize updates VolumeVO.size without adjusting + * PrimaryStorage availableCapacity, causing allocation rate drift. + * + * Test strategy: mock GET_VOLUME_SIZE_PATH to return a larger size, + * call APISyncVolumeSizeMsg, verify that availableCapacity is adjusted. + */ +class NfsSyncVolumeSizeCapacityCase extends SubCase { + EnvSpec env + DatabaseFacade dbf + + @Override + void clean() { + env.delete() + } + + @Override + void setup() { + useSpring(StorageTest.springSpec) + } + + @Override + void environment() { + env = env { + instanceOffering { + name = "instanceOffering" + memory = SizeUnit.GIGABYTE.toByte(8) + cpu = 4 + } + + diskOffering { + name = "diskOffering" + diskSize = SizeUnit.GIGABYTE.toByte(20) + } + + sftpBackupStorage { + name = "sftp" + url = "/sftp" + username = "root" + password = "password" + hostname = "localhost" + + image { + name = "image1" + url = "http://zstack.org/download/test.qcow2" + } + + image { + name = "vr" + url = "http://zstack.org/download/vr.qcow2" + } + } + + zone { + name = "zone" + description = "test" + + cluster { + name = "cluster" + hypervisorType = "KVM" + + kvm { + name = "kvm" + managementIp = "localhost" + username = "root" + password = "password" + } + + attachPrimaryStorage("nfs") + attachL2Network("l2") + } + + nfsPrimaryStorage { + name = "nfs" + url = "localhost:/nfs" + } + + l2NoVlanNetwork { + name = "l2" + physicalInterface = "eth0" + + l3Network { + name = "l3" + + service { + provider = VirtualRouterConstant.PROVIDER_TYPE + types = [NetworkServiceType.DHCP.toString(), NetworkServiceType.DNS.toString()] + } + + service { + provider = SecurityGroupConstant.SECURITY_GROUP_PROVIDER_TYPE + types = [SecurityGroupConstant.SECURITY_GROUP_NETWORK_SERVICE_TYPE] + } + + ip { + startIp = "192.168.100.10" + endIp = "192.168.100.100" + netmask = "255.255.255.0" + gateway = "192.168.100.1" + } + } + + l3Network { + name = "pubL3" + + ip { + startIp = "12.16.10.10" + endIp = "12.16.10.100" + netmask = "255.255.255.0" + gateway = "12.16.10.1" + } + } + } + + virtualRouterOffering { + name = "vr" + memory = SizeUnit.MEGABYTE.toByte(512) + cpu = 2 + useManagementL3Network("pubL3") + usePublicL3Network("pubL3") + useImage("vr") + } + + attachBackupStorage("sftp") + } + + vm { + name = "vm" + useInstanceOffering("instanceOffering") + useImage("image1") + useL3Networks("l3") + useRootDiskOffering("diskOffering") + useHost("kvm") + } + } + } + + @Override + void test() { + env.create { + dbf = bean(DatabaseFacade.class) + testSyncVolumeSizeAdjustsCapacity() + } + } + + /** + * Core test: when APISyncVolumeSizeMsg returns a larger size, + * the primary storage availableCapacity should decrease accordingly. + * + * Before fix: availableCapacity stayed the same after sync + * (causing drift between incremental tracking and recalculation). + * After fix: availableCapacity is properly adjusted. + */ + void testSyncVolumeSizeAdjustsCapacity() { + PrimaryStorageInventory ps = env.inventoryByName("nfs") + VmInstanceInventory vm = env.inventoryByName("vm") + String rootVolumeUuid = vm.rootVolumeUuid + + long sizeIncrease = SizeUnit.GIGABYTE.toByte(10) + + // Step 1: Baseline — reconnect to establish consistent capacity + reconnectPrimaryStorage { + uuid = ps.uuid + } + + VolumeVO volBefore = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) + long initialSize = volBefore.getSize() + long newSize = initialSize + sizeIncrease + + GetPrimaryStorageCapacityResult baseline = getPrimaryStorageCapacity { + primaryStorageUuids = [ps.uuid] + } + + PrimaryStorageCapacityVO capBefore = dbf.findByUuid(ps.uuid, PrimaryStorageCapacityVO.class) + logger.info("BASELINE: vol.size=${initialSize}, ps.available=${capBefore.availableCapacity}") + + // Step 2: hijack the NFS agent response to report larger volume size + env.hijackSimulator(NfsPrimaryStorageKVMBackend.GET_VOLUME_SIZE_PATH) { rsp -> + rsp.size = newSize + rsp.actualSize = newSize + return rsp + } + + // Step 3: Trigger volume size sync — agent reports larger size + syncVolumeSize { + uuid = rootVolumeUuid + } + + // Verify volume size was updated in DB + VolumeVO volAfter = dbf.findByUuid(rootVolumeUuid, VolumeVO.class) + assert volAfter.getSize() == newSize : + "syncVolumeSize should update VolumeVO.size to the value from agent. " + + "expected=${newSize}, actual=${volAfter.getSize()}" + + PrimaryStorageCapacityVO capAfter = dbf.findByUuid(ps.uuid, PrimaryStorageCapacityVO.class) + logger.info("AFTER SYNC: vol.size=${volAfter.getSize()}, ps.available=${capAfter.availableCapacity}") + logger.info("DIFF: sizeIncrease=${sizeIncrease}, capacityDrop=${capBefore.availableCapacity - capAfter.availableCapacity}") + + // Core assertion: availableCapacity must decrease by the size increase + // Before fix: capAfter.availableCapacity == capBefore.availableCapacity (no adjustment) + // After fix: capAfter.availableCapacity == capBefore.availableCapacity - sizeIncrease + assert capAfter.availableCapacity < capBefore.availableCapacity : + "ZSTAC-80937: availableCapacity should decrease when volume size increases via syncVolumeSize. " + + "before=${capBefore.availableCapacity}, after=${capAfter.availableCapacity}, " + + "sizeIncrease=${sizeIncrease}" + + assert capAfter.availableCapacity == capBefore.availableCapacity - sizeIncrease : + "ZSTAC-80937: availableCapacity should decrease by exactly the size increase. " + + "expected=${capBefore.availableCapacity - sizeIncrease}, actual=${capAfter.availableCapacity}" + + logger.info("VERIFIED: capacity dropped from ${capBefore.availableCapacity} to ${capAfter.availableCapacity} (delta=${sizeIncrease})") + } +}