nova创建虚拟机时资源的检测

2019-11-08 00:28:56

字体：大中小

来源：转载

供稿：网友

在通过/nova/compute/manager.py:ComputeManager中的_build_and_run_instance来创建虚拟机的时候，会通过claim机制来监视当前的资源是否够创建虚拟机 def _build_and_run_instance(self, context, instance, image, injected_files, admin_passWord, requested_networks, security_groups, block_device_mapping, node, limits, filter_PRoperties): image_name = image.get('name') self._notify_about_instance_usage(context, instance, 'create.start', extra_usage_info={'image_name': image_name}) self._check_device_tagging(requested_networks, block_device_mapping) try: rt = self._get_resource_tracker(node) with rt.instance_claim(context, instance, limits):可以看到这里下通过_get_resource_tracker 得到rt对象 def _get_resource_tracker(self, nodename): rt = self._resource_tracker_dict.get(nodename) if not rt: if not self.driver.node_is_available(nodename): raise exception.NovaException( _("%s is not a valid node managed by this " "compute host.") % nodename) rt = resource_tracker.ResourceTracker(self.host, self.driver, nodename) self._resource_tracker_dict[nodename] = rt return rt_get_resource_tracker 首先检查_resource_tracker_dict是否已经包含这个rt了，没有的话，就通过resource_tracker.ResourceTracker 来创建这个class其中ResourceTracker是在compute/resource_tracker.py 只是简单的赋值.最后将新创建的rt加到_resource_tracker_dict中继续看instance_claim def instance_claim(self, context, instance, limits=None): """Indicate that some resources are needed for an upcoming compute instance build Operation. This should be called before the compute node is about to perform an instance build operation that will consume additional resources. :param context: security context :param instance: instance to reserve resources for. :type instance: nova.objects.instance.Instance object :param limits: Dict of oversubscription limits for memory, disk, and CPUs. :returns: A Claim ticket representing the reserved resources. It can be used to revert the resource usage if an error occurs during the instance build. """ if self.disabled: # compute_driver doesn't support resource tracking, just # set the 'host' and node fields and continue the build: self._set_instance_host_and_node(instance) return claims.NopClaim() # sanity checks: if instance.host: LOG.warning(_LW("Host field should not be set on the instance " "until resources have been claimed."), instance=instance) if instance.node: LOG.warning(_LW("Node field should not be set on the instance " "until resources have been claimed."), instance=instance) # get the overhead required to build this instance: overhead = self.driver.estimate_instance_overhead(instance) LOG.debug("Memory overhead for %(flavor)d MB instance; %(overhead)d " "MB", {'flavor': instance.flavor.memory_mb, 'overhead': overhead['memory_mb']}) LOG.debug("Disk overhead for %(flavor)d GB instance; %(overhead)d " "GB", {'flavor': instance.flavor.root_gb, 'overhead': overhead.get('disk_gb', 0)}) pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( context, instance.uuid) claim = claims.Claim(context, instance, self, self.compute_node, pci_requests, overhead=overhead, limits=limits) # self._set_instance_host_and_node() will save instance to the DB # so set instance.numa_topology first. We need to make sure # that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE # so that the resource audit knows about any cpus we've pinned. instance_numa_topology = claim.claimed_numa_topology instance.numa_topology = instance_numa_topology self._set_instance_host_and_node(instance) if self.pci_tracker: # NOTE(jaypipes): ComputeNode.pci_device_pools is set below # in _update_usage_from_instance(). self.pci_tracker.claim_instance(context, pci_requests, instance_numa_topology) # Mark resources in-use and update stats self._update_usage_from_instance(context, instance) elevated = context.elevated() # persist changes to the compute node: self._update(elevated) return claim在instance_claim 中重点是申明了claim = claims.Claim(context, instance, self, self.compute_node, pci_requests, overhead=overhead, limits=limits)我们继续看看class Claim(NopClaim): """A declaration that a compute host operation will require free resources. Claims serve as marker objects that resources are being held until the update_available_resource audit process runs to do a full reconciliation of resource usage. This information will be used to help keep the local compute hosts's ComputeNode model in sync to aid the scheduler in making efficient / more correct decisions with respect to host selection. """ def __init__(self, context, instance, tracker, resources, pci_requests, overhead=None, limits=None): super(Claim, self).__init__() # Stash a copy of the instance at the current point of time self.instance = instance.obj_clone() self._numa_topology_loaded = False self.tracker = tracker self._pci_requests = pci_requests if not overhead: overhead = {'memory_mb': 0, 'disk_gb': 0} self.overhead = overhead self.context = context # Check claim at constructor to avoid mess code # Raise exception ComputeResourcesUnavailable if claim failed self._claim_test(resources, limits)初始化一些变量，最重要的就是调用_claim_test def _claim_test(self, resources, limits=None): """Test if this claim can be satisfied given available resources and optional oversubscription limits This should be called before the compute node actually consumes the resources required to execute the claim. :param resources: available local compute node resources :returns: Return true if resources are available to claim. """ if not limits: limits = {} # If an individual limit is None, the resource will be considered # unlimited: memory_mb_limit = limits.get('memory_mb') disk_gb_limit = limits.get('disk_gb') vcpus_limit = limits.get('vcpu') numa_topology_limit = limits.get('numa_topology') LOG.info(_LI("Attempting claim: memory %(memory_mb)d MB, " "disk %(disk_gb)d GB, vcpus %(vcpus)d CPU"), {'memory_mb': self.memory_mb, 'disk_gb': self.disk_gb, 'vcpus': self.vcpus}, instance=self.instance) reasons = [self._test_memory(resources, memory_mb_limit), self._test_disk(resources, disk_gb_limit), self._test_vcpus(resources, vcpus_limit), self._test_numa_topology(resources, numa_topology_limit), self._test_pci()] reasons = [r for r in reasons if r is not None] if len(reasons) > 0: raise exception.ComputeResourcesUnavailable(reason= "; ".join(reasons)) LOG.info(_LI('Claim successful'), instance=self.instance)在_claim_test 中我们可以看到创建一个虚拟机主要check 四个方面的资源，分别是memory/disk/vcpu/numa。针对这四中资源分别调用self._test_memory/self._test_disk/self._test_vcpus/self._test_numa_topology/self._test_pci() 来check def _test_memory(self, resources, limit): type_ = _("memory") unit = "MB" total = resources.memory_mb used = resources.memory_mb_used requested = self.memory_mb return self._test(type_, unit, total, used, requested, limit) def _test(self, type_, unit, total, used, requested, limit): """Test if the given type of resource needed for a claim can be safely allocated. """ LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f ' '%(unit)s'), {'type': type_, 'total': total, 'unit': unit, 'used': used}, instance=self.instance) if limit is None: # treat resource as unlimited: LOG.info(_LI('%(type)s limit not specified, defaulting to ' 'unlimited'), {'type': type_}, instance=self.instance) return free = limit - used # Oversubscribed resource policy info: LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, ' 'free: %(free).02f %(unit)s'), {'type': type_, 'limit': limit, 'free': free, 'unit': unit}, instance=self.instance) if requested > free: return (_('Free %(type)s %(free).02f ' '%(unit)s < requested %(requested)d %(unit)s') % {'type': type_, 'free': free, 'unit': unit, 'requested': requested})原来memory的check 就是用total -used 是否大于request的。可见仅仅是比较大小而已 def _test_disk(self, resources, limit): type_ = _("disk") unit = "GB" total = resources.local_gb used = resources.local_gb_used requested = self.disk_gb return self._test(type_, unit, total, used, requested, limit) def _test_vcpus(self, resources, limit): type_ = _("vcpu") unit = "VCPU" total = resources.vcpus used = resources.vcpus_used requested = self.vcpus return self._test(type_, unit, total, used, requested, limit)disk和cpu也是一样的算法。即看剩下的是否满足要求的。 def _test_pci(self): pci_requests = self._pci_requests if pci_requests.requests: stats = self.tracker.pci_tracker.stats if not stats.support_requests(pci_requests.requests): return _('Claim pci failed.')而pci的检测主要是看当前的tracker.pci_tracker.stats.support_requests 是否包含pci_requests.requests，并没有数量的比较

上一篇：DownloadManager的使用

下一篇：setDestinationInExternalPublicDir方法