该图是kubernetes的总体架构图,设计到kubernetes中的一些重要模块,除了 kubernetes api server 之外,其他的模块都要和 api server 进行通讯来获取须要的资源,原理是 list、watch 机制,当用户建立pod资源后,此时的 pod 中 nodename 属性值是空的,schedule 模块会获取到这样的 pod并为其选择合适的运行节点,schedule为pod选择合适的运行节点是一个很复杂的过程,要考虑不少因素好比zone、node的 affinity、anti-afinity,主机的 cpu、内存、卷冲突、taint等。当为pod获取到合适的运行主机后,会将主机名设置给pod做为一个属性,存储到持久化存储也就是etcd中,kubelet模块在监听pod的nodename属性有值的pod,获取到后在当前主机上运行pod。node
这是官方的一张框架图,这个是最新版本中实现的选择node的流程,在以前的版本中存在不少自定义插件带来的痛点,在以前的版本中插件是基于predicate、Prioritize的方式进行注册,改成 framerwork后的注册方式更加清晰而且扩展性更好,这些会在kubernetes extension中详细说明,图中每个位置都是一个可扩展的点。
基于predicate、Prioritize的注册方式git
// NewLegacyRegistry returns a legacy algorithm registry of predicates and priorities. func NewLegacyRegistry() *LegacyRegistry { registry := &LegacyRegistry{ // MandatoryPredicates the set of keys for predicates that the scheduler will // be configured with all the time. MandatoryPredicates: sets.NewString( PodToleratesNodeTaintsPred, CheckNodeUnschedulablePred, ), // Used as the default set of predicates if Policy was specified, but predicates was nil. DefaultPredicates: sets.NewString( NoVolumeZoneConflictPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred, MaxAzureDiskVolumeCountPred, MaxCSIVolumeCountPred, MatchInterPodAffinityPred, NoDiskConflictPred, GeneralPred, PodToleratesNodeTaintsPred, CheckVolumeBindingPred, CheckNodeUnschedulablePred, ), // Used as the default set of predicates if Policy was specified, but priorities was nil. DefaultPriorities: map[string]int64{ SelectorSpreadPriority: 1, InterPodAffinityPriority: 1, LeastRequestedPriority: 1, BalancedResourceAllocation: 1, NodePreferAvoidPodsPriority: 10000, NodeAffinityPriority: 1, TaintTolerationPriority: 1, ImageLocalityPriority: 1, }, PredicateToConfigProducer: make(map[string]ConfigProducer), PriorityToConfigProducer: make(map[string]ConfigProducer), } registry.registerPredicateConfigProducer(GeneralPred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { // GeneralPredicate is a combination of predicates. plugins.Filter = appendToPluginSet(plugins.Filter, noderesources.FitName, nil) plugins.PreFilter = appendToPluginSet(plugins.PreFilter, noderesources.FitName, nil) if args.NodeResourcesFitArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(noderesources.FitName, args.NodeResourcesFitArgs)) } plugins.Filter = appendToPluginSet(plugins.Filter, nodename.Name, nil) plugins.Filter = appendToPluginSet(plugins.Filter, nodeports.Name, nil) plugins.PreFilter = appendToPluginSet(plugins.PreFilter, nodeports.Name, nil) plugins.Filter = appendToPluginSet(plugins.Filter, nodeaffinity.Name, nil) return }) registry.registerPredicateConfigProducer(PodToleratesNodeTaintsPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, tainttoleration.Name, nil) return }) registry.registerPredicateConfigProducer(PodFitsResourcesPred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, noderesources.FitName, nil) plugins.PreFilter = appendToPluginSet(plugins.PreFilter, noderesources.FitName, nil) if args.NodeResourcesFitArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(noderesources.FitName, args.NodeResourcesFitArgs)) } return }) registry.registerPredicateConfigProducer(HostNamePred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodename.Name, nil) return }) registry.registerPredicateConfigProducer(PodFitsHostPortsPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodeports.Name, nil) plugins.PreFilter = appendToPluginSet(plugins.PreFilter, nodeports.Name, nil) return }) registry.registerPredicateConfigProducer(MatchNodeSelectorPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodeaffinity.Name, nil) return }) registry.registerPredicateConfigProducer(CheckNodeUnschedulablePred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodeunschedulable.Name, nil) return }) registry.registerPredicateConfigProducer(CheckVolumeBindingPred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, volumebinding.Name, nil) return }) registry.registerPredicateConfigProducer(NoDiskConflictPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, volumerestrictions.Name, nil) return }) registry.registerPredicateConfigProducer(NoVolumeZoneConflictPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, volumezone.Name, nil) return }) registry.registerPredicateConfigProducer(MaxCSIVolumeCountPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodevolumelimits.CSIName, nil) return }) registry.registerPredicateConfigProducer(MaxEBSVolumeCountPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodevolumelimits.EBSName, nil) return }) registry.registerPredicateConfigProducer(MaxGCEPDVolumeCountPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodevolumelimits.GCEPDName, nil) return }) registry.registerPredicateConfigProducer(MaxAzureDiskVolumeCountPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodevolumelimits.AzureDiskName, nil) return }) registry.registerPredicateConfigProducer(MaxCinderVolumeCountPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodevolumelimits.CinderName, nil) return }) registry.registerPredicateConfigProducer(MatchInterPodAffinityPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, interpodaffinity.Name, nil) plugins.PreFilter = appendToPluginSet(plugins.PreFilter, interpodaffinity.Name, nil) return }) registry.registerPredicateConfigProducer(CheckNodeLabelPresencePred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, nodelabel.Name, nil) if args.NodeLabelArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(nodelabel.Name, args.NodeLabelArgs)) } return }) registry.registerPredicateConfigProducer(CheckServiceAffinityPred, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Filter = appendToPluginSet(plugins.Filter, serviceaffinity.Name, nil) if args.ServiceAffinityArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(serviceaffinity.Name, args.ServiceAffinityArgs)) } plugins.PreFilter = appendToPluginSet(plugins.PreFilter, serviceaffinity.Name, nil) return }) // Register Priorities. registry.registerPriorityConfigProducer(SelectorSpreadPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, defaultpodtopologyspread.Name, &args.Weight) plugins.PreScore = appendToPluginSet(plugins.PreScore, defaultpodtopologyspread.Name, nil) return }) registry.registerPriorityConfigProducer(TaintTolerationPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.PreScore = appendToPluginSet(plugins.PreScore, tainttoleration.Name, nil) plugins.Score = appendToPluginSet(plugins.Score, tainttoleration.Name, &args.Weight) return }) registry.registerPriorityConfigProducer(NodeAffinityPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, nodeaffinity.Name, &args.Weight) return }) registry.registerPriorityConfigProducer(ImageLocalityPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, imagelocality.Name, &args.Weight) return }) registry.registerPriorityConfigProducer(InterPodAffinityPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.PreScore = appendToPluginSet(plugins.PreScore, interpodaffinity.Name, nil) plugins.Score = appendToPluginSet(plugins.Score, interpodaffinity.Name, &args.Weight) if args.InterPodAffinityArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(interpodaffinity.Name, args.InterPodAffinityArgs)) } return }) registry.registerPriorityConfigProducer(NodePreferAvoidPodsPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, nodepreferavoidpods.Name, &args.Weight) return }) registry.registerPriorityConfigProducer(MostRequestedPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, noderesources.MostAllocatedName, &args.Weight) return }) registry.registerPriorityConfigProducer(BalancedResourceAllocation, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, noderesources.BalancedAllocationName, &args.Weight) return }) registry.registerPriorityConfigProducer(LeastRequestedPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, noderesources.LeastAllocatedName, &args.Weight) return }) registry.registerPriorityConfigProducer(noderesources.RequestedToCapacityRatioName, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.Score = appendToPluginSet(plugins.Score, noderesources.RequestedToCapacityRatioName, &args.Weight) if args.RequestedToCapacityRatioArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(noderesources.RequestedToCapacityRatioName, args.RequestedToCapacityRatioArgs)) } return }) registry.registerPriorityConfigProducer(nodelabel.Name, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { // If there are n LabelPreference priorities in the policy, the weight for the corresponding // score plugin is n*weight (note that the validation logic verifies that all LabelPreference // priorities specified in Policy have the same weight). weight := args.Weight * int32(len(args.NodeLabelArgs.PresentLabelsPreference)+len(args.NodeLabelArgs.AbsentLabelsPreference)) plugins.Score = appendToPluginSet(plugins.Score, nodelabel.Name, &weight) if args.NodeLabelArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(nodelabel.Name, args.NodeLabelArgs)) } return }) registry.registerPriorityConfigProducer(serviceaffinity.Name, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { // If there are n ServiceAffinity priorities in the policy, the weight for the corresponding // score plugin is n*weight (note that the validation logic verifies that all ServiceAffinity // priorities specified in Policy have the same weight). weight := args.Weight * int32(len(args.ServiceAffinityArgs.AntiAffinityLabelsPreference)) plugins.Score = appendToPluginSet(plugins.Score, serviceaffinity.Name, &weight) if args.ServiceAffinityArgs != nil { pluginConfig = append(pluginConfig, NewPluginConfig(serviceaffinity.Name, args.ServiceAffinityArgs)) } return }) // The following two features are the last ones to be supported as predicate/priority. // Once they graduate to GA, there will be no more checking for featue gates here. // Only register EvenPodsSpread predicate & priority if the feature is enabled if utilfeature.DefaultFeatureGate.Enabled(features.EvenPodsSpread) { klog.Infof("Registering EvenPodsSpread predicate and priority function") registry.registerPredicateConfigProducer(EvenPodsSpreadPred, func(_ ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.PreFilter = appendToPluginSet(plugins.PreFilter, podtopologyspread.Name, nil) plugins.Filter = appendToPluginSet(plugins.Filter, podtopologyspread.Name, nil) return }) registry.DefaultPredicates.Insert(EvenPodsSpreadPred) registry.registerPriorityConfigProducer(EvenPodsSpreadPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.PreScore = appendToPluginSet(plugins.PreScore, podtopologyspread.Name, nil) plugins.Score = appendToPluginSet(plugins.Score, podtopologyspread.Name, &args.Weight) return }) registry.DefaultPriorities[EvenPodsSpreadPriority] = 1 } // Prioritizes nodes that satisfy pod's resource limits if utilfeature.DefaultFeatureGate.Enabled(features.ResourceLimitsPriorityFunction) { klog.Infof("Registering resourcelimits priority function") registry.registerPriorityConfigProducer(ResourceLimitsPriority, func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) { plugins.PreScore = appendToPluginSet(plugins.PreScore, noderesources.ResourceLimitsName, nil) plugins.Score = appendToPluginSet(plugins.Score, noderesources.ResourceLimitsName, &args.Weight) return }) registry.DefaultPriorities[ResourceLimitsPriority] = 1 } return registry }
基于framerwork的注册方式github
// ListAlgorithmProviders lists registered algorithm providers. func ListAlgorithmProviders() string { r := NewRegistry() var providers []string for k := range r { providers = append(providers, k) } sort.Strings(providers) return strings.Join(providers, " | ") } func getDefaultConfig() *schedulerapi.Plugins { return &schedulerapi.Plugins{ QueueSort: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: queuesort.Name}, }, }, PreFilter: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: noderesources.FitName}, {Name: nodeports.Name}, {Name: interpodaffinity.Name}, }, }, Filter: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: nodeunschedulable.Name}, {Name: noderesources.FitName}, {Name: nodename.Name}, {Name: nodeports.Name}, {Name: nodeaffinity.Name}, {Name: volumerestrictions.Name}, {Name: tainttoleration.Name}, {Name: nodevolumelimits.EBSName}, {Name: nodevolumelimits.GCEPDName}, {Name: nodevolumelimits.CSIName}, {Name: nodevolumelimits.AzureDiskName}, {Name: volumebinding.Name}, {Name: volumezone.Name}, {Name: interpodaffinity.Name}, }, }, PreScore: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: interpodaffinity.Name}, {Name: defaultpodtopologyspread.Name}, {Name: tainttoleration.Name}, }, }, Score: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: noderesources.BalancedAllocationName, Weight: 1}, {Name: imagelocality.Name, Weight: 1}, {Name: interpodaffinity.Name, Weight: 1}, {Name: noderesources.LeastAllocatedName, Weight: 1}, {Name: nodeaffinity.Name, Weight: 1}, {Name: nodepreferavoidpods.Name, Weight: 10000}, {Name: defaultpodtopologyspread.Name, Weight: 1}, {Name: tainttoleration.Name, Weight: 1}, }, }, Bind: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: defaultbinder.Name}, }, }, } }
在schedule模块中原生的插件只是最基本的,在使用过程当中必定须要不少适合业务的一些插件须要被执行,新的framerwork框架为用户提供了如下几种扩展实现方式api