openstack冷迁移/Resize源码分析(二)

接上一篇 openstack冷迁移/Resize源码分析(一)

执行冷迁移/Resize

/nova_queens/nova/compute/manager.py

    @wrap_exception()@reverts_task_state@wrap_instance_event(prefix='compute')@wrap_instance_faultdef resize_instance(self, context, instance, image,migration, instance_type, clean_shutdown,request_spec=None):"""开始将正在运行的实例迁移到另一台主机。这是从目标主机的“prep_resize”例程启动的,并在源主机上运行。"""try:self._resize_instance(context, instance, image, migration,instance_type, clean_shutdown, request_spec)except Exception:with excutils.save_and_reraise_exception():self._revert_allocation(context, instance, migration)def _resize_instance(self, context, instance, image,migration, instance_type, clean_shutdown,request_spec):# 传递 instance_state=instance.vm_state # 因为我们可以调整 STOPPED 服务器的大小,# 并且我们不想在 migrate_disk_and_power_off 引发 InstanceFaultRollback 的情况下将其设置回 ACTIVE。instance_state = instance.vm_statewith self._error_out_instance_on_exception(context, instance, instance_state=instance_state), \errors_out_migration_ctxt(migration):# 获取网络信息network_info = self.network_api.get_instance_nw_info(context,instance)migration.status = 'migrating'migration.save()instance.task_state = task_states.RESIZE_MIGRATINGinstance.save(expected_task_state=task_states.RESIZE_PREP)# 获取磁盘信息bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(context, instance.uuid)self._send_resize_instance_notifications(context, instance, bdms, network_info,fields.NotificationPhase.START)block_device_info = self._get_instance_block_device_info(context, instance, bdms=bdms)# 获取虚拟机的停机超时和重试信息timeout, retry_interval = self._get_power_off_values(instance, clean_shutdown)# 关闭虚拟机电源并迁移虚拟机磁盘文件# 去 libvirt 下 driver.py 文件查找 migrate_disk_and_power_off 函数disk_info = self.driver.migrate_disk_and_power_off(context, instance, migration.dest_host,instance_type, network_info,block_device_info,timeout, retry_interval)# 断开虚拟机的共享块设备连接self._terminate_volume_connections(context, instance, bdms)# 迁移虚拟机网络self.network_api.migrate_instance_start(context,instance,migration)migration.status = 'post-migrating'migration.save()# 修改虚拟机的主机记录instance.host = migration.dest_computeinstance.node = migration.dest_nodeinstance.task_state = task_states.RESIZE_MIGRATEDinstance.save(expected_task_state=task_states.RESIZE_MIGRATING)# RPC 转换到目标主机以完成调整大小or迁移。# 去nova/compute/manager.py下找finish_resize的实现self.compute_rpcapi.finish_resize(context, instance,migration, image, disk_info, migration.dest_compute,request_spec)self._send_resize_instance_notifications(context, instance, bdms, network_info,fields.NotificationPhase.END)self.instance_events.clear_events_for_instance(instance)

关闭电源+磁盘迁移

/nova_queens/nova/virt/libvirt/driver.py

    def migrate_disk_and_power_off(self, context, instance, dest,flavor, network_info,block_device_info=None,timeout=0, retry_interval=0):LOG.debug("Starting migrate_disk_and_power_off",instance=instance)# 获取临时盘信息ephemerals = driver.block_device_info_get_ephemerals(block_device_info)# 如果新实例请求的块设备映射不包含临时设备,则 get_bdm_ephemeral_disk_size() 将返回 0。 # 但是,我们仍然想检查是否设置了原始实例的 ephemeral_gb 属性并确保新请求的flavor临时大小更大eph_size = (block_device.get_bdm_ephemeral_disk_size(ephemerals) orinstance.flavor.ephemeral_gb)# 检查迁移是否需要缩小磁盘大小。root_down = flavor.root_gb < instance.flavor.root_gbephemeral_down = flavor.ephemeral_gb < eph_size# 检查虚拟机是否通过卷启动booted_from_volume = self._is_booted_from_volume(block_device_info)# 本地磁盘文件不能 Resizeif (root_down and not booted_from_volume) or ephemeral_down:reason = _("Unable to resize disk down.")raise exception.InstanceFaultRollback(exception.ResizeError(reason=reason))# NOTE(dgenin): 没有为 LVM backed 的虚拟机提供迁移支持.# 也就是说 LVM Backend 且不是卷启动的虚拟机(也就是镜像启动用本地存储的虚拟机)不能迁移if CONF.libvirt.images_type == 'lvm' and not booted_from_volume:reason = _("Migration is not supported for LVM backed instances")raise exception.InstanceFaultRollback(exception.MigrationPreCheckError(reason=reason))# 将磁盘复制到目标# 首先将实例目录重命名为 +_resize 以使用实例目录的共享存储。inst_base = libvirt_utils.get_instance_path(instance)inst_base_resize = inst_base + "_resize"# 判断共享存储shared_instance_path = self._is_path_shared_with(dest, inst_base)# 如果失败,请尝试在远程计算节点上创建目录,# 我们将异常向上传递堆栈,以便我们可以更早地在此处捕获故障if not shared_instance_path:try:# 非共享存储:通过 SSH 在目的主机上创建虚拟机目录self._remotefs.create_dir(dest, inst_base)except processutils.ProcessExecutionError as e:reason = _("not able to execute ssh command: %s") % eraise exception.InstanceFaultRollback(exception.ResizeError(reason=reason))# 关闭虚拟机电源self.power_off(instance, timeout, retry_interval)# 卸载共享块设备block_device_mapping = driver.block_device_info_get_mapping(block_device_info)for vol in block_device_mapping:connection_info = vol['connection_info']self._disconnect_volume(context, connection_info, instance)# 获取 disk.info 配置文件内容# 记录了 Root Disk、Ephemeral Disk、Swap Disk 的 file pathsdisk_info = self._get_instance_disk_info(instance, block_device_info)try:os.rename(inst_base, inst_base_resize)# 如果我们使用共享实例路径迁移实例,则创建目录。# 如果是远程节点,则目录已经创建if shared_instance_path:# 共享存储:目的主机看作是自己dest = Nonefileutils.ensure_tree(inst_base)on_execute = lambda process: \self.job_tracker.add_job(instance, process.pid)on_completion = lambda process: \self.job_tracker.remove_job(instance, process.pid)# 块迁移虚拟机本地磁盘文件for info in disk_info:# 假设 inst_base == dirname(info['path'])img_path = info['path']fname = os.path.basename(img_path)from_path = os.path.join(inst_base_resize, fname)# 我们不会在这里复制交换磁盘,而是依靠 finish_migration 为我们重新创建它。 # 这是可以的,因为操作系统已关闭,并且由于重新创建交换磁盘非常便宜,因此它比本地复制或通过网络复制更有效。 这也意味着我们不必调整它的大小。if fname == 'disk.swap':continue# 是否启用压缩compression = info['type'] not in NO_COMPRESSION_TYPES# 非共享存储:使用 scp 远程拷贝# 共享存储:使用 cp 本地拷贝libvirt_utils.copy_image(from_path, img_path, host=dest,on_execute=on_execute,on_completion=on_completion,compression=compression)# 确保将 disk.info 写入新路径,以避免重新检查磁盘并可能更改格式。# 拷贝 diks.inof 配置文件src_disk_info_path = os.path.join(inst_base_resize, 'disk.info')if os.path.exists(src_disk_info_path):dst_disk_info_path = os.path.join(inst_base, 'disk.info')libvirt_utils.copy_image(src_disk_info_path,dst_disk_info_path,host=dest, on_execute=on_execute,on_completion=on_completion)# 如果需要,处理 vTPM 数据的迁移libvirt_utils.save_and_migrate_vtpm_dir(instance.uuid, inst_base_resize, inst_base, dest,on_execute, on_completion)except Exception:with excutils.save_and_reraise_exception():self._cleanup_remote_migration(dest, inst_base,inst_base_resize,shared_instance_path)return jsonutils.dumps(disk_info)

/nova_queens/nova/compute/manager.py

    @wrap_exception()@reverts_task_state@wrap_instance_event(prefix='compute')@errors_out_migration@wrap_instance_faultdef finish_resize(self, context, disk_info, image, instance,migration, request_spec=None):"""完成迁移过程。设置新传输的磁盘并在其新主机上打开实例。"""try:# 调用本py文件的_finish_resize_helper函数self._finish_resize_helper(context, disk_info, image, instance,migration, request_spec)except Exception:with excutils.save_and_reraise_exception():# 此时,resize_instance(在源上运行)已经更新了实例主机/节点值以指向此(dest)计算# 因此我们需要保持对 dest 节点资源提供者的分配不变,并删除对 源节点资源提供者。 # 如果用户试图通过硬重启来恢复服务器,它会发生在这个主机上,所以这就是分配应该去的地方。 # 请注意,这与从 confirm_resize 调用以清除迁移记录持有的源节点分配的方法相同。"""在 finish_resize 失败后删除源节点实例上旧flavor的分配。 您可以通过硬重启来恢复实例。"""LOG.info('Deleting allocations for old flavor on source node ''%s after finish_resize failure. You may be able to ''recover the instance by hard rebooting it.',migration.source_compute, instance=instance)self._delete_allocation_after_move(context, instance, migration)

同一个py文件

    def _finish_resize_helper(self, context, disk_info, image, instance,migration):"""完成迁移过程。如果迁移过程失败,调用者必须恢复实例的分配。"""...# 调用本py文件的的_finish_resizenetwork_info = self._finish_resize(context, instance, migration,disk_info, image_meta, bdms)...

同一个py文件

    def _finish_resize(self, context, instance, migration, disk_info,image_meta, bdms):resize_instance = False...# 判断migration中的instance_type_id,看执行冷迁移还是resizeif old_instance_type_id != new_instance_type_id:...if old_instance_type[key] != instance_type[key]:resize_instance = True...# 在目标主机上设置网络self.network_api.setup_networks_on_host(context, instance,migration['dest_compute'])...# 获取当前云主机的网络信息network_info = self.network_api.get_instance_nw_info(context, instance)# 更新数据库虚拟机状态为 RESIZE_FINISHinstance.task_state = task_states.RESIZE_FINISHinstance.save(expected_task_state=task_states.RESIZE_MIGRATED)...# 使用目标主机连接器更新所有卷附件# 以便我们可以在调用 driver.finish_migration 之前更新 BDM.connection_info# 否则驱动程序将不知道如何将卷连接到该主机。self._update_volume_attachments(context, instance, bdms)block_device_info = self._get_instance_block_device_info(context, instance, refresh_conn_info=True, bdms=bdms)# 如果原始 vm_state 为 STOPPED,我们不会在实例迁移后自动启动实例power_on = old_vm_state != vm_states.STOPPEDtry:# 调用libvirt driver# 去libvirt下driver.py文件查找finish_migration函数# nova/virt/libvirt/driver.pyself.driver.finish_migration(context, migration, instance,disk_info,network_info,image_meta, resize_instance,block_device_info, power_on)...

完成迁移,libvirt根据xml启动虚拟机

nova_queens/nova/virt/libvirt/driver.py

    def finish_migration(self, context, migration, instance, disk_info,network_info, image_meta, resize_instance,block_device_info=None, power_on=True):LOG.debug("Starting finish_migration", instance=instance)# 迁移磁盘信息(仅仅对于root_gb与ephemeral_gb)block_disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,instance,image_meta,block_device_info)# 创建镜像self._create_image(context, instance, block_disk_info['mapping'],block_device_info=block_device_info,ignore_bdi_for_swap=True,fallback_from_host=migration.source_compute)# 获取磁盘信息(例如virio、disk.local、disk.swap)disk_info = jsonutils.loads(disk_info)...if (disk_name != 'disk.config' andinfo['type'] == 'raw' and CONF.use_cow_images):self._disk_raw_to_qcow2(info['path'])# 生成xml文件,就是虚拟机的xmlxml = self._get_guest_xml(context, instance, network_info,block_disk_info, image_meta,block_device_info=block_device_info)# 根据配置及xml文件创建虚拟机guest = self._create_domain_and_network(context, xml, instance,network_info,block_device_info=block_device_info,power_on=power_on,vifs_already_plugged=True,post_xml_callback=gen_confdrive)#如果迁移前为开机状态,则开机(至此迁移全部完成)if power_on:timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,instance)timer.start(interval=0.5).wait()# 迁移完成,同步guest时间guest.sync_guest_time()LOG.debug("finish_migration finished successfully.", instance=instance)

进行confirm_resize检查,成功则清理源主机,失败则回滚

nova_queens/nova/api/openstack/compute/servers.py

    @wsgi.response(204)@wsgi.expected_errors((400, 404, 409))@wsgi.action('confirmResize')def _action_confirm_resize(self, req, id, body):context = req.environ['nova.context']context.can(server_policies.SERVERS % 'confirm_resize')# 从数据库获取实例信息instance = self._get_server(context, req, id)try:# 调用 compute 下的 api.py 文件的 confirm_resize 函数self.compute_api.confirm_resize(context, instance)except exception.InstanceUnknownCell as e:raise exc.HTTPNotFound(explanation=e.format_message())except exception.MigrationNotFound:msg = _("Instance has not been resized.")raise exc.HTTPBadRequest(explanation=msg)except exception.InstanceIsLocked as e:raise exc.HTTPConflict(explanation=e.format_message())except exception.InstanceInvalidState as state_error:common.raise_http_conflict_for_instance_invalid_state(state_error,'confirmResize', id)

nova_queens/nova/compute/api.py

    @check_instance_lock@check_instance_cell@check_instance_state(vm_state=[vm_states.RESIZED])def confirm_resize(self, context, instance, migration=None):"""确认迁移/调整大小并删除“旧”实例。"""elevated = context.elevated()# 判断 migration 是否已经完成if migration is None:# 已完成则变更为 finishedmigration = objects.Migration.get_by_instance_and_status(elevated, instance.uuid, 'finished')# 未完成则设置迁移状态为正在confirmingmigration.status = 'confirming'migration.save()# 通知实例确认resizeself._record_action_start(context, instance,instance_actions.CONFIRM_RESIZE)# prc 调用 compute 下的 manager.py 的 confirm_resize 函数self.compute_rpcapi.confirm_resize(context,instance,migration,migration.source_compute)

nova_queens/nova/compute/manager.py

    @wrap_exception()@wrap_instance_event(prefix='compute')@errors_out_migration@wrap_instance_faultdef confirm_resize(self, context, instance, migration):"""确认迁移/调整大小并删除“旧”实例。这是从 API 调用并在源主机上运行的。此时目标主机上不需要发生任何事情,因为实例已经在那里运行。 这个例程只是清理源主机。"""@utils.synchronized(instance.uuid)def do_confirm_resize(context, instance, migration_id):LOG.debug("Going to confirm migration %s", migration_id,instance=instance)try:# 从migrate表中获取该instance resize记录migration = objects.Migration.get_by_id(context.elevated(), migration_id)except exception.MigrationNotFound:LOG.error("Migration %s is not found during confirmation",migration_id, instance=instance)return# 如果已经被确认,则不能再次确认if migration.status == 'confirmed':LOG.info("Migration %s is already confirmed",migration_id, instance=instance)return# 如果迁移状态不是完成或者正在迁移,则数据有异常,直接return退出elif migration.status not in ('finished', 'confirming'):LOG.warning("Unexpected confirmation status '%(status)s' ""of migration %(id)s, exit confirmation process",{"status": migration.status, "id": migration_id},instance=instance)return# 从数据库中获取实例,如果已经被删除,就什么都不做,直接返回这里expected_attrs = ['metadata', 'system_metadata', 'flavor']try:# 根据uuid获取instance信息instance = objects.Instance.get_by_uuid(context, instance.uuid,expected_attrs=expected_attrs)except exception.InstanceNotFound:LOG.info("Instance is not found during confirmation",instance=instance)return...# 调用本py文件的_confirm_resizeself._confirm_resize(...# 回调自身,直到满足条件退出do_confirm_resize(context, instance, migration.id)

同一个py文件

    def _confirm_resize(self, context, instance, migration=None):"""销毁源实例。"""# 通知实例确认 resize.confirm 开始self._notify_about_instance_usage(context, instance,"resize.confirm.start")compute_utils.notify_about_instance_action(context, instance,self.host, action=fields.NotificationAction.RESIZE_CONFIRM,phase=fields.NotificationPhase.START)# 删除之前保存的迁移信息,并更新 instance 为新的 flavor 数据old_instance_type = instance.old_flavorinstance.old_flavor = Noneinstance.new_flavor = Noneinstance.system_metadata.pop('old_vm_state', None)instance.save()# 清理源主机的网络信息self.network_api.setup_networks_on_host(context, instance,migration.source_compute, teardown=True)# 获取当前实例的网络信息network_info = self.network_api.get_instance_nw_info(context,instance)# NOTE(adrianc): Populate old PCI device in VIF profile# to allow virt driver to properly unplug it from Hypervisor.pci_mapping = (instance.migration_context.get_pci_mapping_for_migration(True))network_info = self._get_updated_nw_info_with_pci_mapping(network_info, pci_mapping)# 在此处获取 BDM 并将它们传递给驱动程序。# 调用 libvirt 的 dirver.py 的 confirm_migration 函数self.driver.confirm_migration(context, migration, instance,network_info)# 设置迁移状态为确认完成migration.status = 'confirmed'with migration.obj_as_admin():migration.save()rt = self._get_resource_tracker()rt.drop_move_claim(context, instance, migration.source_node,old_instance_type, prefix='old_')instance.drop_migration_context()# NOTE(mriedem): The old_vm_state could be STOPPED but the user# might have manually powered up the instance to confirm the# resize/migrate, so we need to check the current power state# on the instance and set the vm_state appropriately. We default# to ACTIVE because if the power state is not SHUTDOWN, we# assume _sync_instance_power_state will clean it up.p_state = instance.power_statevm_state = Noneif p_state == power_state.SHUTDOWN:vm_state = vm_states.STOPPEDLOG.debug("Resized/migrated instance is powered off. ""Setting vm_state to '%s'.", vm_state,instance=instance)else:vm_state = vm_states.ACTIVEinstance.vm_state = vm_stateinstance.task_state = Noneinstance.save(expected_task_state=[None, task_states.DELETING,task_states.SOFT_DELETING])# 通知实例 resize.confirm 确认完成self._notify_about_instance_usage(context, instance, "resize.confirm.end",network_info=network_info)compute_utils.notify_about_instance_action(context, instance,self.host, action=fields.NotificationAction.RESIZE_CONFIRM,phase=fields.NotificationPhase.END)

confirm成功,向 libvirt 发出命令,摧毁源主机

nova_queens/nova/virt/libvirt/driver.py

    def confirm_migration(self, context, migration, instance, network_info):"""确认调整大小,摧毁源虚拟机。"""# 调用本py文件的 _cleanup_resize 函数self._cleanup_resize(context, instance, network_info)

同一个py文件

    def _cleanup_resize(self, context, instance, network_info):inst_base = libvirt_utils.get_instance_path(instance)target = inst_base + '_resize'if os.path.exists(target):# 删除_resize文件夹,为了避免失败,重试5次utils.execute('rm', '-rf', target, delay_on_retry=True,attempts=5)...if instance.host != CONF.host:# 调用libvirt接口,将该虚拟机undefineself._undefine_domain(instance)# 网络及防火墙信息、配置清理self.unplug_vifs(instance, network_info)self.unfilter_instance(instance, network_info)

confirm失败,revert_resize回滚

nova_queens/nova/api/openstack/compute/servers.py

    @wsgi.response(202)@wsgi.expected_errors((400, 404, 409))@wsgi.action('revertResize')def _action_revert_resize(self, req, id, body):context = req.environ['nova.context']context.can(server_policies.SERVERS % 'revert_resize')# 从数据库获取instance信息instance = self._get_server(context, req, id)try:# 调用 compute 下的 api.py 的 revert_resize 函数self.compute_api.revert_resize(context, instance)...

/nova/nova/compute/api.py

    @check_instance_lock@check_instance_cell@check_instance_state(vm_state=[vm_states.RESIZED])def revert_resize(self, context, instance):"""恢复调整大小,删除过程中的“新”实例。"""# 回滚resize,并将新实例删除elevated = context.elevated()# 从数据库migration 表中,获取迁移信息migration = objects.Migration.get_by_instance_and_status(elevated, instance.uuid, 'finished')# 如果这是调整大小,则还原可能会超过配额。self._check_quota_for_upsize(context, instance, instance.flavor,instance.old_flavor)...# 设置虚拟机任务状态为 RESIZE_REVERTINGinstance.task_state = task_states.RESIZE_REVERTINGinstance.save(expected_task_state=[None])# 迁移状态设置为 revertingmigration.status = 'reverting'migration.save()# 通知实例回滚 resizeself._record_action_start(context, instance,instance_actions.REVERT_RESIZE)# 在初始调整大小操作期间,Conductor 更新了 RequestSpec.flavor 以指向新的flavor# 因此我们需要更新 RequestSpec 以指向旧的flavor,否则通过调度程序进行的后续移动操作将使用错误的flavor。try:reqspec = objects.RequestSpec.get_by_instance_uuid(context, instance.uuid)reqspec.flavor = instance.old_flavorreqspec.save()...#prc 调用 compute下的 mannager.py 中的 revert_resizeself.compute_rpcapi.revert_resize(context, instance,migration,migration.dest_compute)

nova_queens/nova/compute/manager.py

    @wrap_exception()@reverts_task_state@wrap_instance_event(prefix='compute')@errors_out_migration@wrap_instance_faultdef revert_resize(self, context, instance, migration):"""销毁目标机器上的新实例。 回滚resize改变,并给源主机上的旧实例开机。"""# revert_resize 本质上是将大小调整回旧大小,因此我们需要在此处发送 usage 事件。compute_utils.notify_usage_exists(self.notifier, context, instance,current_period=True)with self._error_out_instance_on_exception(context, instance):# 目标主机的网络信息清除self.network_api.setup_networks_on_host(context, instance,teardown=True)# 获取原始的实例,迁移信息migration_p = obj_base.obj_to_primitive(migration)# 网络迁移开始self.network_api.migrate_instance_start(context,instance,migration_p)# 获取实例网络及磁盘设备信息network_info = self.network_api.get_instance_nw_info(context,instance)bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(context, instance.uuid)block_device_info = self._get_instance_block_device_info(context, instance, bdms=bdms)destroy_disks = not self._is_instance_storage_shared(context, instance, host=migration.source_compute)# 销毁目标主机上的新实例self.driver.destroy(context, instance, network_info,block_device_info, destroy_disks)# 断开与卷连接self._terminate_volume_connections(context, instance, bdms)# 设置迁移状态migration.status = 'reverted'...# RPC 转换回源主机以在那里完成还原。# prc调用,去找 compute 下的 mannager.py 的 finish_revert_resize 函数self.compute_rpcapi.finish_revert_resize(context, instance,migration, migration.source_compute)

同一个py文件

    @wrap_exception()@reverts_task_state@wrap_instance_event(prefix='compute')@errors_out_migration@wrap_instance_faultdef finish_revert_resize(self, context, instance, migration):"""完成在源主机上恢复调整大小的后半部分。 恢复原始源实例状态(活动/关闭)并恢复数据库中调整大小的属性。"""...# 通知 resize.revert 回滚开始self._notify_about_instance_usage(context, instance, "resize.revert.start")compute_utils.notify_about_instance_action(context, instance,self.host, action=fields.NotificationAction.RESIZE_REVERT,phase=fields.NotificationPhase.START, bdms=bdms)# 回滚数据库实例信息old_vm_state = instance.system_metadata.pop('old_vm_state',vm_states.ACTIVE)self._set_instance_info(instance, instance.old_flavor)instance.old_flavor = Noneinstance.new_flavor = Noneinstance.host = migration.source_computeinstance.node = migration.source_nodeinstance.save()self._revert_allocation(context, instance, migration)# 源主机上创建网络self.network_api.setup_networks_on_host(context, instance,migration.source_compute)migration_p = obj_base.obj_to_primitive(migration)...migration_p['dest_compute'] = migration.source_compute# 迁移虚拟机网络self.network_api.migrate_instance_finish(context,instance,migration_p)network_info = self.network_api.get_instance_nw_info(context,instance)...self._update_volume_attachments(context, instance, bdms)# 获取磁盘信息block_device_info = self._get_instance_block_device_info(context, instance, refresh_conn_info=True, bdms=bdms)power_on = old_vm_state != vm_states.STOPPED# 完成回滚迁移,调用 libvirt 的 drivirt.py 文件的 finish_revert_migration 函数self.driver.finish_revert_migration(context, instance,network_info,block_device_info, power_on)instance.drop_migration_context()instance.launched_at = timeutils.utcnow()# 设置虚拟机状态信息instance.save(expected_task_state=task_states.RESIZE_REVERTING)self._complete_volume_attachments(context, bdms)LOG.info("Updating instance to original state: '%s'",old_vm_state, instance=instance)#设置虚拟机相应运行状态if power_on:instance.vm_state = vm_states.ACTIVEinstance.task_state = Noneinstance.save()else:instance.task_state = task_states.POWERING_OFFinstance.save()self.stop_instance(context, instance=instance,clean_shutdown=True)# 通知 resize.revert 完成self._notify_about_instance_usage(context, instance, "resize.revert.end")compute_utils.notify_about_instance_action(context, instance,self.host, action=fields.NotificationAction.RESIZE_REVERT,phase=fields.NotificationPhase.END, bdms=bdms)

向libvirt发出命令,根据xml启动虚拟机

nova_queens/nova/virt/libvirt/driver.py

    def finish_revert_migration(self, context, instance, network_info,block_device_info=None, power_on=True):LOG.debug("开始finish_revert_migration",instance=instance)# 获取实例路径及resize路径inst_base = libvirt_utils.get_instance_path(instance)inst_base_resize = inst_base + "_resize"# 如果我们正在从失败的迁移中恢复,请确保我们没有遗留的会发生冲突的相同主机基本目录。 # 实现就是如果inst_base_resize 路径存在,就清理掉,保证没有冲突目录if os.path.exists(inst_base_resize):self._cleanup_failed_migration(inst_base)utils.execute('mv', inst_base_resize, inst_base)# 快照处理root_disk = self.image_backend.by_name(instance, 'disk')if root_disk.exists():try:root_disk.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)except exception.SnapshotNotFound:LOG.warning("Failed to rollback snapshot (%s)",libvirt_utils.RESIZE_SNAPSHOT_NAME)finally:root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME,ignore_errors=True)# 获取磁盘信息disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,instance,instance.image_meta,block_device_info)# 根据上述信息及配置,生成xml文件,创建虚拟机xml = self._get_guest_xml(context, instance, network_info, disk_info,instance.image_meta,block_device_info=block_device_info)self._create_domain_and_network(context, xml, instance, network_info,block_device_info=block_device_info,power_on=power_on,vifs_already_plugged=True)if power_on:timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,instance)timer.start(interval=0.5).wait()LOG.debug("finish_revert_migration finished successfully.",instance=instance)

冷迁移/Resize相对详细的流程基本就是这样,但是在准备时期具体的quota处理、回滚时期资源、quota的细节上还没有深入。
如果要做增强开发,或者增加热规格调整这样的新特性的话,这几点基本是避不开的。
但是楼主这个懒货还要睡懒觉、做好吃的、喂猫猫、打游戏,看NASA新出的片,估计要到猴年马月了

参考文献

https://www.codetd.com/article/9445368
https://blog.csdn.net/tantexian/article/details/41444461
https://blog.csdn.net/tantexian/article/details/41519135
https://blog.csdn.net/tantexian/article/details/41519179

Published by

风君子

独自遨游何稽首 揭天掀地慰生平