目前在做同城双活,每个机房都有一个k8s集群,其中一些共享卷需要做到双活的2个集群上都可用。

比如我们现在在主机房的集群上有一个PVC:

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  annotations:
    pv.kubernetes.io/bind-completed: "yes"
    pv.kubernetes.io/bound-by-controller: "yes"
    volume.beta.kubernetes.io/storage-provisioner: com.tencent.cloud.csi.cfs
  creationTimestamp: "2020-11-25T07:24:15Z"
  name: public-devops-teststatefulset-nginx-online-pvc
  namespace: test
  uid: 3d65a636-f4c3-4c49-b957-c45673f48bc6
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 30Gi
  storageClassName: cfs-hp
  volumeMode: Filesystem
  volumeName: pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6

以及对应的PV:

apiVersion: v1
kind: PersistentVolume
metadata:
  annotations:
    pv.kubernetes.io/provisioned-by: com.tencent.cloud.csi.cfs
  creationTimestamp: "2020-11-25T07:24:22Z"
  name: pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6
  uid: 301c903e-10fc-40ee-98e1-0655948425fc
spec:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 30Gi
  claimRef:
    apiVersion: v1
    kind: PersistentVolumeClaim
    name: public-devops-teststatefulset-nginx-online-pvc
    namespace: test
    resourceVersion: "657951844"
    uid: 3d65a636-f4c3-4c49-b957-c45673f48bc6
  csi:
    driver: com.tencent.cloud.csi.cfs
    fsType: ext4
    volumeAttributes:
      fsid: cfs-3c7e956f6
      host: 10.10.144.12
      storage.kubernetes.io/csiProvisionerIdentity: 1597949457398-8081-com.tencent.cloud.csi.cfs
      storagetype: HP
      subnetid: subnet-mg9pp5o5
      vpcid: vpc-aqpp2l40
      zone: ap-shanghai-4
    volumeHandle: cfs-3c7e956f6
  persistentVolumeReclaimPolicy: Delete
  storageClassName: cfs-hp
  volumeMode: Filesystem

如果我们将上面的yaml直接导到备机房的集群上,会发现其实有问题:

[root@sh5-saas-k8scs1-master-online-01 ~]# kubectl  get pvc -n test  -o wide
NAME                                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS      AGE    VOLUMEMODE
data1-public-devops-teststatefulset-nginx-online-0   Bound    pvc-ad9a8bad-f9e6-4968-a633-d786a1925b38   30Gi       RWO            cbs-ssd-prepaid   123d   Filesystem
data1-public-devops-teststatefulset-nginx-online-1   Bound    pvc-67df7b6f-d5a1-47a3-9239-bc5173b1eb36   30Gi       RWO            cbs-ssd-prepaid   75d    Filesystem
public-devops-teststatefulset-nginx-online-pvc       Bound    pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6   30Gi       RWX            cfs-hp            19s    Filesystem
[root@sh5-saas-k8scs1-master-online-01 ~]# kubectl  get pvc -n test  -o wide
NAME                                                 STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS      AGE     VOLUMEMODE
data1-public-devops-teststatefulset-nginx-online-0   Bound    pvc-ad9a8bad-f9e6-4968-a633-d786a1925b38   30Gi       RWO            cbs-ssd-prepaid   123d    Filesystem
data1-public-devops-teststatefulset-nginx-online-1   Bound    pvc-67df7b6f-d5a1-47a3-9239-bc5173b1eb36   30Gi       RWO            cbs-ssd-prepaid   75d     Filesystem
public-devops-teststatefulset-nginx-online-pvc       Lost     pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6   0                         cfs-hp            3m57s   Filesystem
[root@sh5-saas-k8scs1-master-online-01 ~]# kubectl describe pvc -n test public-devops-teststatefulset-nginx-online-pvc 
Name:          public-devops-teststatefulset-nginx-online-pvc
Namespace:     test
StorageClass:  cfs-hp
Status:        Lost
Volume:        pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6
Labels:        app=public-devops-teststatefulset-nginx-online
Annotations:   pv.kubernetes.io/bind-completed: yes
               pv.kubernetes.io/bound-by-controller: yes
               volume.beta.kubernetes.io/storage-provisioner: com.tencent.cloud.csi.cfs
Finalizers:    [kubernetes.io/pvc-protection]
Capacity:      0
Access Modes:  
VolumeMode:    Filesystem
Mounted By:    <none>
Events:
  Type     Reason         Age    From                         Message
  ----     ------         ----   ----                         -------
  Warning  ClaimMisbound  4m19s  persistentvolume-controller  Two claims are bound to the same volume, this one is bound incorrectly

pvc的状态是lost的,查看事件,发现报错:Two claims are bound to the same volume, this one is bound incorrectly。

根据报错查找kubernetes的源码,发现在pkg/controller/volume/persistentvolume/pv_controller.go:

// syncBoundClaim is the main controller method to decide what to do with a
// bound claim.
func (ctrl *PersistentVolumeController) syncBoundClaim(claim *v1.PersistentVolumeClaim) error {
	// HasAnnotation(pvc, pvutil.AnnBindCompleted)
	// This PVC has previously been bound
	// OBSERVATION: pvc is not "Pending"
	// [Unit test set 3]
	if claim.Spec.VolumeName == "" {
		// Claim was bound before but not any more.
		if _, err := ctrl.updateClaimStatusWithEvent(claim, v1.ClaimLost, nil, v1.EventTypeWarning, "ClaimLost", "Bound claim has lost reference to PersistentVolume. Data on the volume is lost!"); err != nil {
			return err
		return nil
	obj, found, err := ctrl.volumes.store.GetByKey(claim.Spec.VolumeName)
	if err != nil {
		return err
	if !found {
		// Claim is bound to a non-existing volume.
		if _, err = ctrl.updateClaimStatusWithEvent(claim, v1.ClaimLost, nil, v1.EventTypeWarning, "ClaimLost", "Bound claim has lost its PersistentVolume. Data on the volume is lost!"); err != nil {
			return err
		return nil
	} else {
		volume, ok := obj.(*v1.PersistentVolume)
		if !ok {
			return fmt.Errorf("Cannot convert object from volume cache to volume %q!?: %#v", claim.Spec.VolumeName, obj)
		klog.V(4).Infof("synchronizing bound PersistentVolumeClaim[%s]: volume %q found: %s", claimToClaimKey(claim), claim.Spec.VolumeName, getVolumeStatusForLogging(volume))
		if volume.Spec.ClaimRef == nil {
			// Claim is bound but volume has come unbound.
			// Or, a claim was bound and the controller has not received updated
			// volume yet. We can't distinguish these cases.
			// Bind the volume again and set all states to Bound.
			klog.V(4).Infof("synchronizing bound PersistentVolumeClaim[%s]: volume is unbound, fixing", claimToClaimKey(claim))
			if err = ctrl.bind(volume, claim); err != nil {
				// Objects not saved, next syncPV or syncClaim will try again
				return err
			return nil
		} else if volume.Spec.ClaimRef.UID == claim.UID {
			// All is well
			// NOTE: syncPV can handle this so it can be left out.
			// NOTE: bind() call here will do nothing in most cases as
			// everything should be already set.
			klog.V(4).Infof("synchronizing bound PersistentVolumeClaim[%s]: claim is already correctly bound", claimToClaimKey(claim))
			if err = ctrl.bind(volume, claim); err != nil {
				// Objects not saved, next syncPV or syncClaim will try again
				return err
			return nil
		} else {
			// Claim is bound but volume has a different claimant.
			// Set the claim phase to 'Lost', which is a terminal
			// phase.
			if _, err = ctrl.updateClaimStatusWithEvent(claim, v1.ClaimLost, nil, v1.EventTypeWarning, "ClaimMisbound", "Two claims are bound to the same volume, this one is bound incorrectly"); err != nil {
				return err
			return nil

通过查看源码,可以发现是pv中的claimRef存在,且claimRef.UID与pvc的UID对应不上(if elseif的条件)导致的。主要原因是虽然yaml内有uid,但是apply到一个新的集群内,UID会重新生成,跟yaml内的不一样了。

那我们去掉UID和claimRef,PVC的yaml如下:

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  annotations:
    pv.kubernetes.io/bind-completed: "yes"
    pv.kubernetes.io/bound-by-controller: "yes"
    volume.beta.kubernetes.io/storage-provisioner: com.tencent.cloud.csi.cfs
  name: public-devops-teststatefulset-nginx-online-pvc
  namespace: test
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 30Gi
  storageClassName: cfs-hp
  volumeMode: Filesystem
  volumeName: pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6

pv的yaml如下:

apiVersion: v1
kind: PersistentVolume
metadata:
  annotations:
    pv.kubernetes.io/provisioned-by: com.tencent.cloud.csi.cfs
  name: pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6
spec:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 30Gi
  csi:
    driver: com.tencent.cloud.csi.cfs
    fsType: ext4
    volumeAttributes:
      fsid: cfs-3c7e956f6
      host: 10.10.144.12
      storage.kubernetes.io/csiProvisionerIdentity: 1597949457398-8081-com.tencent.cloud.csi.cfs
      storagetype: HP
      subnetid: subnet-mg9pp5o5
      vpcid: vpc-aqpp2l40
      zone: ap-shanghai-4
    volumeHandle: cfs-3c7e956f6
  persistentVolumeReclaimPolicy: Delete
  storageClassName: cfs-hp
  volumeMode: Filesystem

将上面的yaml apply后,发现正常了:

[root@sh5-saas-k8scs1-master-online-01 ~]# kubectl  get pv pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                                 STORAGECLASS   REASON   AGE
pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6   30Gi       RWX            Delete           Bound    test/public-devops-teststatefulset-nginx-online-pvc   cfs-hp                  4h2m
[root@sh5-saas-k8scs1-master-online-01 ~]# kubectl get pvc -n test public-devops-teststatefulset-nginx-online-pvc -o wide
NAME                                             STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE     VOLUMEMODE
public-devops-teststatefulset-nginx-online-pvc   Bound    pvc-3d65a636-f4c3-4c49-b957-c45673f48bc6   30Gi       RWX            cfs-hp         3h53m   Filesystem