diff --git a/.gitignore b/.gitignore index 6cd4685..e86768d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ adaptive_trans adaptive-transformers-in-rl-master.zip log MAPPO_MPE -run_multi_fn_dag.py \ No newline at end of file +run_multi_fn_dag.py +serverless_sim/cache/ \ No newline at end of file diff --git a/scripts/analyse_experiment_data.py b/scripts/analyse_experiment_data.py deleted file mode 100644 index 8fc14e5..0000000 --- a/scripts/analyse_experiment_data.py +++ /dev/null @@ -1,154 +0,0 @@ -# 自动运行同路径下的run_different_req_freq.py -import os -CUR_FPATH = os.path.abspath(__file__) -CUR_FDIR = os.path.dirname(CUR_FPATH) -# chdir to the directory of this script -os.chdir(CUR_FDIR) - -import json -import os -import subprocess -import time -import matplotlib.pyplot as plt -import numpy as np - -# json文件中各个字段的索引 -FRAME_IDX_FRAME = 0; # 帧数 -FRAME_IDX_RUNNING_REQS = 1; # 请求数量 -FRAME_IDX_NODES = 2; # 节点的状态:cpu、mem -FRAME_IDX_REQ_DONE_TIME_AVG = 3; # 请求的平均完成时间 -FRAME_IDX_REQ_DONE_TIME_STD = 4; # 请求的完成时间的标准差 -FRAME_IDX_REQ_DONE_TIME_AVG_90P = 5; # 请求的90%完成时间 -FRAME_IDX_COST = 6; # 成本 -FRAME_IDX_SCORE = 7; # 得分(强化学习用) -FRAME_IDX_DONE_REQ_COUNT = 8; # 已完成请求数量 -FRAME_IDX_REQ_WAIT_SCHE_TIME = 9; # 等待调度的时间 -FRAME_IDX_REQ_WAIT_COLDSTART_TIME = 10; # 冷启动的时间 -FRAME_IDX_REQ_DATA_RECV_TIME = 11; # 数据接收时间 -FRAME_IDX_REQ_EXE_TIME = 12; # 请求的执行时间 -FRAME_IDX_ALGO_EXE_TIME = 13; # 算法执行时间 -FRAME_IDX_FNCONTAINER_COUNT = 14; # 总的容器数量 - -""" -目前比较的指标有: -FRAME_IDX_REQ_DONE_TIME_AVG = 3; # 请求的平均完成时间 -FRAME_IDX_COST = 6; # 成本 -性价比: 1 / (FRAME_IDX_REQ_DONE_TIME_AVG * FRAME_IDX_COST) -FRAME_IDX_REQ_WAIT_COLDSTART_TIME = 10; # 冷启动的时间 -""" - -# 记录文件的路径,相对路径报错 -records_path = "..\\serverless_sim\\records" -script_path = ".\\run_different_req_freq.py" # py脚本的路径 -output_path = "实验结果\\算法延迟\\实验结果-10帧生成" - -# create outout dir -try: - os.makedirs(output_path) -except: - pass - -RUN_TIMES = 10 # 运行次数 -# 算法组合,key为算法名,value为数组,数组的元素为字典,key为参数名,value为参数值 -algos_metrics = {} # HashMap>> - -# 多次运行脚本以分析实验数据 -def run_script(): - for _ in range(RUN_TIMES): - # 使用subprocess.run来运行脚本,并等待其完成 - result = subprocess.run(['python', script_path], check=True) - # 检查运行结果,如果失败则抛出异常 - if result.returncode != 0: - raise Exception(f"脚本运行失败,返回码: {result.returncode}") - # 可以在这里添加等待时间,如果需要的话 - time.sleep(1) - - -# 根据执行后的json文件分析运行了哪些算法组合 -def analyze_which_algo(): - json_files = [f for f in os.listdir(records_path) if f.endswith('.json')] - algos = [] - for file in json_files: - # 用 . 分割文件名,取出算法名 - compete_name = file.split('.') - a = 1 - algo_name = compete_name[5][4:] + "." + compete_name[9][4:] + "." + compete_name[11][3:] - if algo_name not in algos: - algos.append(algo_name) - for algo in algos: - algos_metrics[algo] = [] - - -# 分析同一个算法运行 RUN_TIMES 次的实验结果折线图 -def analyze_same_algo_metrics_bytimes(): - json_files = [f for f in os.listdir(records_path) if f.endswith('.json')] - for file in json_files: - # 取出算法名 - compete_name = file.split('.') - algo_name = compete_name[5][4:] + "." + compete_name[9][4:] + "." + compete_name[11][3:] - - with open(os.path.join(records_path, file), 'r') as f: - # print("fcontent: ", f.read()) - # 读取json数据 - record = json.load(f) - frames = record['frames'] - done_time = frames[len(frames) - 1][3] - cost = frames[len(frames) - 1][6] - efficency = 1 / (frames[len(frames) - 1][3] * frames[len(frames) - 1][6]) - cold_start_time = frames[len(frames) - 1][10] - algos_metrics[algo_name].append({'req_done_time_avg': done_time, 'cost': cost, 'efficency': efficency, 'cold_start_time': cold_start_time}) - - -# 分析不同算法运行 RUN_TIMES 次的平均指标柱状图 -def analyze_diff_algo_avg_metrics(): - metrics = ['req_done_time_avg', 'cost', 'efficency', 'cold_start_time'] - colors = ['b', 'g', 'r', 'c', 'm', 'y'] - - # 生成折线图 - for algo, data in algos_metrics.items(): - fig, axs = plt.subplots(2, 2, figsize=(15, 10)) - for idx, metric in enumerate(metrics): - ax = axs[idx // 2, idx % 2] - values = [d[metric] for d in data] - for i, entry in enumerate(data): - ax.plot(range(len(data)), values, color=colors[i % len(colors)]) - std_value = np.std(values) - ax.text(0.95, 0.95, f'STD: {std_value:.5f}', transform=ax.transAxes, fontsize=12, verticalalignment='top', horizontalalignment='right') - ax.set_title(f'{metric}') - ax.set_xlabel('TIMES') - ax.set_ylabel(metric) - ax.legend() - fig.suptitle(f'{algo} - Metrics') - plt.tight_layout(rect=[0, 0, 1, 0.96]) - plt.savefig(os.path.join(output_path, f"{algo}.png")) - - # 生成直方图 - avg_metrics = {algo: {metric: np.mean([entry[metric] for entry in data]) for metric in metrics} for algo, data in algos_metrics.items()} - - fig, axs = plt.subplots(2, 2, figsize=(15, 10)) - for idx, metric in enumerate(metrics): - ax = axs[idx // 2, idx % 2] - algo_names = list(avg_metrics.keys()) - values = [avg_metrics[algo][metric] for algo in algo_names] - bars = ax.bar(algo_names, values, color=colors[:len(algo_names)]) - ax.set_title(f'Average {metric} Comparison') - ax.set_xlabel('Algorithm') - ax.set_ylabel(metric) - ax.set_xticklabels(algo_names, rotation=45, ha='right') - # 在每个直方上方显示具体数值 - for bar, value in zip(bars, values): - ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f'{value:.2f}', ha='center', va='bottom') - plt.tight_layout() - plt.savefig(os.path.join(output_path, "avg_comparison.png")) - - -if __name__ == "__main__": - # run_script() - - analyze_which_algo() - - - analyze_same_algo_metrics_bytimes() - - - analyze_diff_algo_avg_metrics() diff --git a/scripts/batch_run.yml b/scripts/batch_run.yml index f2c5e05..ee56c25 100644 --- a/scripts/batch_run.yml +++ b/scripts/batch_run.yml @@ -1,41 +1,44 @@ -run_time: 4 +run_time: 1 params: request_freq: - - low: + # - low: - middle: - - high: + # - high: dag_type: # - single: - # - mix: - - dag: + - mix: + # - dag: no_mech_latency: - true: # - false: mech_scale_sche: - scale_sche_joint: - scale_num: - # - hpa: - # - temp_scaler: - - ensure_scaler: - scale_down_exec: - - default: - scale_up_exec: - - least_task: - sche: - # - bp_balance: - # - pos: greedy - - ensure_scheduler: - filter: - # - [] - - [{'careful_down':''}] + # scale_sche_joint: + # scale_num: + # - hpa: + # - lass: + # # - temp_scaler: + # # - ensure_scaler: + # - Q_learning_hpa: + # scale_down_exec: + # - default: + # scale_up_exec: + # - least_task: + # sche: + # - bp_balance: + # # - pos: greedy + # # - ensure_scheduler: + # filter: + # # - [] + # - [{'careful_down':''}] scale_sche_separated: scale_num: - - temp_scaler: + # - temp_scaler: - hpa: - - lass: + # - Q_learning_hpa: + # - lass: scale_down_exec: - default: scale_up_exec: @@ -44,9 +47,9 @@ mech_scale_sche: # - greedy: # - hash: # - random: - - load_least: - # - rotate: - - pass: + # - load_least: + - rotate: + # - pass: filter: # - [] - [{'careful_down':''}] @@ -63,7 +66,7 @@ mech_scale_sche: # # - hash: # # - random: # # - rotate: - # # - faasflow: + # - faasflow: # filter: # - [] diff --git a/scripts/draw.yaml b/scripts/draw.yaml deleted file mode 100644 index ed7558d..0000000 --- a/scripts/draw.yaml +++ /dev/null @@ -1,44 +0,0 @@ -## filter with fixed value -filter: - dag_type: single - cold_start: high - fn_type: cpu - scale_down_exec: default. - # request_freq: low - -## each group bars -targets_alias: -- [{scale_num: temp_scaler., scale_up_exec: least_task.,sche: pos.greedy, instance_cache_policy: no_evict.}, 'Temp_POSG_NoEvi'] -- [{scale_num: temp_scaler., scale_up_exec: least_task.,sche: pos.random, instance_cache_policy: no_evict.}, 'Temp_POSR_NoEvi'] -# - [{scale_num: temp_scaler., scale_up_exec: least_task.,sche: pos., instance_cache_policy: lru.10}, 'Temp_POS_LRU10'] -- [{scale_num: hpa., scale_up_exec: least_task.,sche: pos.greedy,instance_cache_policy: no_evict.}, 'HPA_POSG_NoEvi'] -- [{scale_num: hpa., scale_up_exec: least_task.,sche: pos.random,instance_cache_policy: no_evict.}, 'HPA_POSR_NoEvi'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: pos., instance_cache_policy: lru.10}, 'HPA_POS_LRU10'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: random.,instance_cache_policy: no_evict.}, 'HPA_Random_NoEvi'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: random.,instance_cache_policy: lru.10}, 'HPA_Random_LRU10'] -- [{scale_num: hpa., scale_up_exec: least_task.,sche: greedy., instance_cache_policy: no_evict.}, 'HPA_Greedy_NoEvi'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: greedy., instance_cache_policy: lru.10}, 'HPA_Greedy_LRU10'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: bp_balance., instance_cache_policy: lru.10}, 'HPA_bp_balance_LRU10'] -# - [{scale_num: hpa., scale_up_exec: least_task.,sche: bp_balance., instance_cache_policy: no_evict.}, 'HPA_bp_balance_NoEvi'] - -# - [{scale_num: full_placement., scale_up_exec: least_task.,sche: pos., instance_cache_policy: lru.10}, 'FP_Pos_lru.10'] -# - [{scale_num: full_placement., scale_up_exec: least_task.,sche: pos., instance_cache_policy: no_evict.}, 'FP_Pos_NoEvi'] - -# - [{mechtype: scale_sche_joint,scale_num: hpa., scale_up_exec: least_task.,sche: pos.}, 'Joint_HPA_POS)'] -# - [{mechtype: scale_sche_joint,scale_num: temp., scale_up_exec: least_task.,sche: pos.}, 'Joint_Temp_POS'] - -# - [{mechtype: no_scaler,scale_num: 'no', scale_up_exec: 'no',sche: greedy}, 'NoScalerGreedy'] - -## group on x axis: -group: - by: request_freq - types: [low,middle,high] - alias: 'Request Frequency' - type_alias: ['Low','Middle','High'] - -## y axis -values: -# - {alias: Throughput, trans: throughput} -- {alias: Cost, trans: cost_per_req} -- {alias: Latency(ms), trans: '[waitsche_time_per_req,coldstart_time_per_req,datarecv_time_per_req,exe_time_per_req]'} # convert 10ms to ms -- {alias: Quality-Price Ratio, trans: 1/cost_per_req/time_per_req} \ No newline at end of file diff --git a/scripts/draw_bar.py b/scripts/draw_bar.py index d9adb6d..a83d18e 100644 --- a/scripts/draw_bar.py +++ b/scripts/draw_bar.py @@ -8,6 +8,8 @@ from pprint import pprint import yaml import re +import matplotlib +matplotlib.use('TkAgg') import matplotlib.pyplot as plt import numpy as np @@ -449,6 +451,7 @@ def adjust_brightness(hex_color, factor): + # plt.savefig('./ex_results/result.png', dpi=300, bbox_inches='tight') plt.show() diff --git a/scripts/draw_diff_load_on_1_figure.py b/scripts/draw_diff_load_on_1_figure.py new file mode 100644 index 0000000..967bcd7 --- /dev/null +++ b/scripts/draw_diff_load_on_1_figure.py @@ -0,0 +1,541 @@ +import os +CUR_FPATH = os.path.abspath(__file__) +CUR_FDIR = os.path.dirname(CUR_FPATH) +# chdir to the directory of this script +os.chdir(CUR_FDIR) + +import requests +from pprint import pprint +import yaml +import re +import matplotlib.pyplot as plt +import numpy as np + +### doc: https://fvd360f8oos.feishu.cn/docx/RMjfdhRutoDmOkx4f4Lcl1sjnzd + +# class PackedRecord: +# # configstr.clone().into(), +# # cost_per_req, +# # time_per_req, +# # score, +# # rps.into(), +# # f.time_str.clone().into() +# raw_record=[] + +# configstr="" +# cost_per_req=0.0 +# time_per_req=0.0 +# score=0.0 +# rps=0.0 +# coldstart_time_per_req=0.0 +# waitsche_time_per_req=0.0 +# datarecv_time_per_req=0.0 +# exe_time_per_req=0.0 + +# filename="" + +# rand_seed="" +# request_freq="" +# dag_type="" +# cold_start="" +# scale_num="" +# scale_down_exec="" +# scale_up_exec="" +# fn_type="" +# instance_cache_policy="" + + +# def __init__(self, raw_record): +# if len(raw_record) != 10: +# raise ValueError("The input list must contain exactly 10 elements.") +# self.configstr = raw_record[0] +# self.cost_per_req = raw_record[1] +# self.time_per_req = raw_record[2] +# self.score = raw_record[3] +# self.rps = raw_record[4] +# self.coldstart_time_per_req=raw_record[5] +# self.waitsche_time_per_req=raw_record[6] +# self.datarecv_time_per_req=raw_record[7] +# self.exe_time_per_req=raw_record[8] +# self.filename = raw_record[9] + + +# # compute sub values by config str +# self.parse_configstr() + +# def parse_configstr(self): +# config_patterns = [ +# (r'sd(\w+)\.rf', 'rand_seed'), +# (r'\.rf(\w+)\.', 'request_freq'), +# (r'\.dt(\w+)\.', 'dag_type'), +# (r'\.cs(\w+)\.', 'cold_start'), +# (r'\.ft(\w+)\.', 'fn_type'), +# (r'\.scl\(([^)]+)\)\(([^)]+)\)\(([^)]+)\)\.', 'scale_num', 'scale_down_exec', 'scale_up_exec'), +# (r'\.scd\(([^)]+)\)', 'sche'), +# (r'\.ic\(([^)]+)\)', 'instance_cache_policy') +# ] + +# for pattern, *keys in config_patterns: +# match = re.search(pattern, self.configstr) +# if match: +# values = match.groups() +# for key, value in zip(keys, values): +# setattr(self, key, value) +# self.print_attributes() + + +# def print_attributes(self): +# attributes = [ +# 'configstr', 'cost_per_req', 'time_per_req', 'score', 'rps', 'filename', +# 'rand_seed', 'request_freq', 'dag_type', 'cold_start', 'fn_type', +# 'scale_num', 'scale_down_exec', 'scale_up_exec', 'sche' +# ] +# for attr in attributes: +# print(f"{attr}={getattr(self, attr)}") + +import records_read +# { +# confstr: [files...] +# } +def get_record_filelist(drawconf): + conf_2_files=records_read.group_by_conf_files() + # filter out we dont care + new={} + for confstr in conf_2_files: + conf=records_read.FlattenConfig(confstr) + confjson=conf.json() + + nomatch_filter=False + + # check match draw filter + for drawfilter in drawconf['filter']: + if drawfilter in confjson: + if confjson[drawfilter]!=drawconf['filter'][drawfilter]: + # continue + nomatch_filter=True + break + + if nomatch_filter: + continue + + + nomatch_targets=True + # check match draw targets_alias + for target in drawconf['targets_alias']: + nomatch_target=False + for targetkey in target[0]: + if targetkey not in confjson: + print("!!! invalid target alias with key",targetkey) + exit(1) + if confjson[targetkey]!=target[0][targetkey]: + # continue + nomatch_target=True + break + if not nomatch_target: + nomatch_targets=False + break + # if invalid: + # continue + if nomatch_targets: + continue + new[confstr]=conf_2_files[confstr] + return new + +# no return +# panic if check failed +def check_first_draw_group_match_avg_cnt(drawconf,conf_2_files): + avg_cnt=drawconf['avg_cnt'] + if avg_cnt==0: + print("!!! avg_cnt should not be 0") + exit(1) + + first_group_k=drawconf['group']['by'] + first_group_v=drawconf['group']['types'][0] + conf_2_files_only_first_group={} + # filter + for confstr in conf_2_files: + conf=records_read.FlattenConfig(confstr) + if getattr(conf,first_group_k)==first_group_v: + conf_2_files_only_first_group[confstr]=conf_2_files[confstr] + + # all group files cnt >= avg_cnt + for confstr in conf_2_files_only_first_group: + if len(conf_2_files_only_first_group[confstr])") + exit(1) + + yamlfilepath=sys.argv[1] + + drawconf=yaml.safe_load(open(yamlfilepath, 'r', encoding='utf-8')) + + print("\n\n get_record_filelist") + conf_2_files=get_record_filelist(drawconf) + + print("\n\n check_first_draw_group_match_avg_cnt") + check_first_draw_group_match_avg_cnt(drawconf,conf_2_files) + + print("\n\n get_each_group_prev_avg_cnt_file__compute_avg") + records=get_each_group_prev_avg_cnt_file__compute_avg(drawconf,conf_2_files) + + print("\n\n flatten records") + records=[records[confstr] for confstr in records] + for record in records: + # record.print_attributes() + print(record.configstr) + # print([r.configstr for r in records]) + + print("\n\n group_records") + groups=group_records(records,drawconf) + + print("\n\n to_draw_meta") + drawmeta=to_draw_meta(groups,drawconf) + + print("\n\n") + pprint(drawmeta) + draw_with_draw_meta(drawmeta,drawconf) + # import matplotlib.pyplot as plt + # from collections import defaultdict + + + # groups = defaultdict(list) + # for record in records: + # key_parts = record[0].split(".") + # common_part = ".".join(key_parts[1:5]) + # algorithm = "".join(key_parts[5:len(key_parts) - 1]) + # algorithm = algorithm.split(")") + # algorithm = ")\n".join(algorithm) + # record[5] = algorithm + # groups[common_part].append(record) + + + # for group_name, group_records in groups.items(): + # data_points = { + # 'Cost': [row[1] for row in group_records], + # 'Latency': [row[2] for row in group_records], + # } + # costs = data_points['Cost'] + # latencies = data_points['Latency'] + # value_for_money = [(1 / latency) * 1 / cost if cost != 0 and latency != 0 else float('inf') for latency, cost in zip(latencies, costs)] # 防止除以零 + # data_points['Performance_Cost'] = value_for_money + + # x_ticks = [row[5] for row in group_records] + + # for key, values in data_points.items(): + # plt.figure() + # bars = plt.bar(range(len(values)), values) + # plt.title(f'Comparison of {key} in {group_name}') + # plt.xlabel('Experiment') + # plt.ylabel(key) + # plt.xticks(range(len(values)), x_ticks, fontsize = 9) + # plt.subplots_adjust(bottom = 0.21) + + # for bar in bars: + # height = bar.get_height() + # plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height:.4f}', ha='center', va='bottom') + + # plt.show() + +pipeline() \ No newline at end of file diff --git a/scripts/draw_diff_load_on_1_figure.yaml b/scripts/draw_diff_load_on_1_figure.yaml new file mode 100644 index 0000000..b8e9a10 --- /dev/null +++ b/scripts/draw_diff_load_on_1_figure.yaml @@ -0,0 +1,94 @@ +# 如果需要一张图中画三种负载的实验结果,只需要改下面的 targets_alias 块的内容, +# 与之前的一个负载单张图配置信息有所不同,注意自己看一下怎么写的 targets_alias 配置项 + +avg_cnt: 1 + +## filter with fixed value +filter: +# dag_type: single + cold_start: high +# fn_type: cpu +# scale_down_exec: default. +# # request_freq: low + +## each group bars +targets_alias: +# 整体方案绘图表 +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'Ours'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'ensure_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'ensure_scheduler.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'ENSURE'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'faasflow.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'faasflow'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'LaSS+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+PASS'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'Q_learning_hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'RL-scaler+LoadLeast'] + + +# 扩缩容消融实验绘图表------LoadLeast +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPTD+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'LaSS+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'Q_learning_hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'RL-scaler+LoadLeast'] + +# 扩缩容消融实验绘图表------bp_balance +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPTD+DLO'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'LaSS+DLO'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+DLO'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'Q_learning_hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'RL-scaler+DLO'] + + +# # 调度消融实验------ +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+DLO'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+PASS'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+RoundRobin'] + + +# 蚂蚁实验---100 dag +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'temp+bp'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pos.greedy', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'temp+pos'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'hash'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'load_least'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'rotate'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+hash'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'dag', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+rotate'] + +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pos.greedy', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'temp+pos'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'hash'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'load_least'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'rotate'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+hash'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+rotate'] + +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pos.greedy', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'ours'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'hash'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'load_least'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'rotate'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+hash'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+LoadLeast'] +# - [{'rand_seed': '', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'hpa+rotate'] + + +group on x axis: +group: + by: request_freq + types: [low,middle,high,AliTrace] + alias: 'Request Frequency' + type_alias: ['Low','Middle','High', 'AliTrace'] + +# group: +# by: cold_start +# types: [high] +# alias: '' +# type_alias: [''] + +## y axis +values: +# - {alias: Quality-Price Ratio, trans: '10/cost_per_req/time_per_req if cost_per_req>0 and time_per_req>0 else 0'} +- {alias: Quality-Price Ratio, trans: '10 * rps/cost_per_req/time_per_req if cost_per_req>0 and time_per_req>0 else 0'} +- {alias: Cost, trans: 100 * cost_per_req} +- {alias: Latency(ms), trans: 'time_per_req'} # convert 10ms to ms +# - {alias: Cold Start Latency(ms), trans: 'coldstart_time_per_req'} # convert 10ms to ms +- {alias: Throuphput, trans: rps*1000} +# - {alias: Avg Container Count, trans: fn_container_cnt} diff --git a/scripts/fast_draw.yml b/scripts/fast_draw.yml index 95c5c75..081c8d2 100644 --- a/scripts/fast_draw.yml +++ b/scripts/fast_draw.yml @@ -11,22 +11,13 @@ filter: ## each group bars targets_alias: -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'ensure_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'ensure_scheduler.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_joint.scl(ensure_scaler.)(default.)(least_task.)[(careful_down.)].scd(ensure_scheduler.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_joint.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(bp_balance.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pos.greedy', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_joint.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(pos.greedy).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_joint.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(bp_balance.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pos.greedy', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_joint.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(pos.greedy).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'greedy.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(greedy.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'hash.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(hash.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(load_least.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(pass.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(hpa.)(default.)(least_task.)[(careful_down.)].scd(rotate.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(lass.)(default.)(least_task.)[(careful_down.)].scd(load_least.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(lass.)(default.)(least_task.)[(careful_down.)].scd(pass.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'greedy.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(greedy.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(load_least.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(pass.).ic(no_evict.)'] -- [{'rand_seed': '', 'request_freq': 'low', 'dag_type': 'single', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'rotate.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'sd.rflow.dtsingle.cshigh.ftcpu.nml1.mtscale_sche_separated.scl(temp_scaler.)(default.)(least_task.)[(careful_down.)].scd(rotate.).ic(no_evict.)'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'temp_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'bp_balance.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'Ours'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'ensure_scaler.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'ensure_scheduler.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'ENSURE'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'no.', 'scale_down_exec': 'default.', 'scale_up_exec': 'no.', 'sche': 'faasflow.', 'instance_cache_policy': 'no_evict.', 'filter': '', 'no_mech_latency': '1'}, 'FaasFlow'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'lass.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'LaSS+LoadLeast'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+LoadLeast'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'pass.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'HPA+PASS'] +- [{'rand_seed': '', 'request_freq': 'burst', 'dag_type': 'mix', 'cold_start': 'high', 'fn_type': 'cpu', 'scale_num': 'Q_learning_hpa.', 'scale_down_exec': 'default.', 'scale_up_exec': 'least_task.', 'sche': 'load_least.', 'instance_cache_policy': 'no_evict.', 'filter': '(careful_down.)', 'no_mech_latency': '1'}, 'RL-scaler+LoadLeast'] ## group on x axis: @@ -38,9 +29,10 @@ group: ## y axis values: -# - {alias: Throughput, trans: throughput} -- {alias: Cost, trans: cost_per_req} -- {alias: Latency(ms), trans: '[waitsche_time_per_req,coldstart_time_per_req,datarecv_time_per_req,exe_time_per_req]'} # convert 10ms to ms -- {alias: Quality-Price Ratio, trans: 'rps/cost_per_req/time_per_req if cost_per_req>0 and time_per_req>0 else 0'} +# - {alias: Quality-Price Ratio, trans: '10/cost_per_req/time_per_req if cost_per_req>0 and time_per_req>0 else 0'} +- {alias: Quality-Price Ratio, trans: '10 * rps/cost_per_req/time_per_req if cost_per_req>0 and time_per_req>0 else 0'} +- {alias: Cost, trans: 100 * cost_per_req} +- {alias: Latency(ms), trans: 'time_per_req'} # convert 10ms to ms +# - {alias: Cold Start Latency(ms), trans: 'coldstart_time_per_req'} # convert 10ms to ms - {alias: Throuphput, trans: rps*1000} -- {alias: Avg Container Count, trans: fn_container_cnt} +# - {alias: Avg Container Count, trans: fn_container_cnt} diff --git a/serverless_sim/.gitignore b/serverless_sim/.gitignore index ae9f3fb..e317794 100644 --- a/serverless_sim/.gitignore +++ b/serverless_sim/.gitignore @@ -4,4 +4,6 @@ records* log module_conf_es.json cache +!src/cache/ +!src/cache/*.rs azure-trace diff --git a/serverless_sim/Cargo.toml b/serverless_sim/Cargo.toml index 7b92e4e..4397c40 100644 --- a/serverless_sim/Cargo.toml +++ b/serverless_sim/Cargo.toml @@ -34,6 +34,9 @@ parking_lot = "0.12" rand_distr = "0.4.3" futures = "0.3" cpu-time = "1.0.0" +sysinfo = "0.29.7" +threadpool = "1.8" + csv = "1.3.1" petgraph = "0.6.5" diff --git a/serverless_sim/module_conf_es.json b/serverless_sim/module_conf_es.json index 6c08067..a6e9547 100644 --- a/serverless_sim/module_conf_es.json +++ b/serverless_sim/module_conf_es.json @@ -6,40 +6,47 @@ }, "scale_num": { "no": null, - "full_placement": null, - "rela": null, - "ensure_scaler": null, + "Q_learning_hpa": null, "hpa": null, "lass": null, - "temp_scaler": null + "rela": null, + "temp_scaler": null, + "knee_scaler": null, + "full_placement": null, + "ensure_scaler": null }, "scale_down_exec": { "default": null }, "scale_up_exec": { - "no": null, - "least_task": null + "least_task": null, + "no": null }, "sche": { - "random": null, + "load_least": null, + "faasflow": null, "greedy": null, "ensure_scheduler": null, - "pos": null, + "bcws": null, + "rotate": null, + "pass": null, "consistenthash": null, - "faasflow": null, - "load_least": null, "hash": null, - "pass": null, - "rotate": null, + "pos": null, + "fnsche": null, "bp_balance": null, - "fnsche": null + "random": null }, "filter": { "careful_down": null }, "instance_cache_policy": { - "fifo": null, + "faascache": null, + "scache": null, "no_evict": null, - "lru": null + "flame": null, + "fifo": null, + "lru": null, + "rbuc": null } } \ No newline at end of file diff --git a/serverless_sim/src/cache/faascache.rs b/serverless_sim/src/cache/faascache.rs new file mode 100644 index 0000000..ab1a6c6 --- /dev/null +++ b/serverless_sim/src/cache/faascache.rs @@ -0,0 +1,161 @@ +use std::collections::HashMap; +use std::sync::{Mutex, OnceLock}; + +use crate::fn_dag::FnId; + +use super::InstanceCachePolicy; + +#[derive(Clone, Copy)] +struct FnProfile { + init_cost: f32, + size: f32, +} + +struct CacheEntry { + priority: f32, + last_touch_tick: u64, +} + +fn profile_registry() -> &'static Mutex> { + static REGISTRY: OnceLock>> = OnceLock::new(); + REGISTRY.get_or_init(|| Mutex::new(HashMap::new())) +} + +pub fn register_fn_profile(fnid: FnId, init_cost: f32, size: f32) { + let mut registry = profile_registry().lock().unwrap(); + registry.insert( + fnid, + FnProfile { + init_cost: init_cost.max(1.0), + size: size.max(1.0), + }, + ); +} + +fn fn_profile(fnid: FnId) -> FnProfile { + let registry = profile_registry().lock().unwrap(); + registry.get(&fnid).copied().unwrap_or(FnProfile { + init_cost: 1.0, + size: 1.0, + }) +} + +pub struct FaasCache { + capacity: usize, + clock: f32, + tick: u64, + frequencies: HashMap, + entries: HashMap, +} + +impl FaasCache { + pub fn new(capacity: usize) -> Self { + Self { + capacity, + clock: 0.0, + tick: 0, + frequencies: HashMap::new(), + entries: HashMap::new(), + } + } + + fn next_tick(&mut self) { + self.tick += 1; + } + + fn compute_priority(&self, fnid: FnId, freq: u64) -> f32 { + let profile = fn_profile(fnid); + self.clock + (freq as f32) * profile.init_cost / profile.size + } + + fn touch_entry(&mut self, fnid: FnId) { + let freq = { + let freq = self.frequencies.entry(fnid).and_modify(|v| *v += 1).or_insert(1); + *freq + }; + let priority = self.compute_priority(fnid, freq); + self.entries + .entry(fnid) + .and_modify(|entry| { + entry.priority = priority; + entry.last_touch_tick = self.tick; + }) + .or_insert(CacheEntry { + priority, + last_touch_tick: self.tick, + }); + } + + fn evict_one( + &mut self, + mut can_be_evict: impl FnMut(&FnId) -> bool, + ) -> Option { + let victim = self + .entries + .iter() + .filter(|(fnid, _)| can_be_evict(fnid)) + .min_by(|(fnid_a, entry_a), (fnid_b, entry_b)| { + entry_a + .priority + .partial_cmp(&entry_b.priority) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| entry_a.last_touch_tick.cmp(&entry_b.last_touch_tick)) + .then_with(|| fnid_a.cmp(fnid_b)) + }) + .map(|(fnid, _)| *fnid); + + if let Some(victim) = victim { + let removed = self.entries.remove(&victim).unwrap(); + self.clock = self.clock.max(removed.priority); + self.frequencies.remove(&victim); + return Some(victim); + } + + None + } +} + +impl InstanceCachePolicy for FaasCache { + fn get(&mut self, key: FnId) -> Option { + self.next_tick(); + if self.entries.contains_key(&key) { + self.touch_entry(key); + return Some(key); + } + None + } + + fn put( + &mut self, + key: FnId, + mut can_be_evict: Box bool>, + ) -> (Option, bool) { + self.next_tick(); + + if self.entries.contains_key(&key) { + self.touch_entry(key); + return (None, true); + } + + let mut evicted = None; + while self.entries.len() >= self.capacity { + let Some(victim) = self.evict_one(|fnid| can_be_evict(fnid)) else { + return (None, false); + }; + evicted = Some(victim); + } + + self.touch_entry(key); + (evicted, true) + } + + fn remove_all(&mut self, key: &FnId) -> bool { + let removed = self.entries.remove(key).is_some(); + if removed { + self.frequencies.remove(key); + } + removed + } +} + +unsafe impl Send for FaasCache {} diff --git a/serverless_sim/src/cache/flame.rs b/serverless_sim/src/cache/flame.rs new file mode 100644 index 0000000..4efef3c --- /dev/null +++ b/serverless_sim/src/cache/flame.rs @@ -0,0 +1,297 @@ +use std::cmp::Ordering; +use std::collections::{HashMap, HashSet}; +use std::sync::{Mutex, OnceLock}; + +use crate::fn_dag::FnId; + +use super::InstanceCachePolicy; + +#[derive(Clone, Copy, PartialEq, Eq)] +enum CacheSpace { + Protected, + Temporary, +} + +struct CacheEntry { + space: CacheSpace, + hit_count: u64, + last_touch_tick: u64, +} + +struct GlobalHotController { + region_size: f32, + period_ops: u64, + op_in_period: u64, + current_counts: HashMap, + scores: HashMap, + hot_set: HashSet, +} + +impl GlobalHotController { + fn new() -> Self { + Self { + region_size: 0.5, + period_ops: 200, + op_in_period: 0, + current_counts: HashMap::new(), + scores: HashMap::new(), + hot_set: HashSet::new(), + } + } + + fn record_access(&mut self, fnid: FnId) { + self.current_counts + .entry(fnid) + .and_modify(|v| *v += 1) + .or_insert(1); + self.op_in_period += 1; + if self.op_in_period >= self.period_ops { + self.refresh(); + } + } + + fn refresh(&mut self) { + let mut all_fn_ids: HashSet = HashSet::new(); + all_fn_ids.extend(self.scores.keys().copied()); + all_fn_ids.extend(self.current_counts.keys().copied()); + + for fnid in all_fn_ids { + let current = self.current_counts.remove(&fnid).unwrap_or(0) as f32; + let prev = self.scores.get(&fnid).copied().unwrap_or(0.0); + let score = current + 0.5 * prev; + if score > 0.00001 { + self.scores.insert(fnid, score); + } else { + self.scores.remove(&fnid); + } + } + + self.op_in_period = 0; + self.rebuild_hot_set(); + } + + fn rebuild_hot_set(&mut self) { + let mut sorted = self + .scores + .iter() + .map(|(fnid, score)| (*fnid, *score)) + .collect::>(); + sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + + self.hot_set.clear(); + if sorted.is_empty() { + return; + } + + let total = sorted.iter().map(|(_, score)| *score).sum::(); + if total <= 0.00001 { + return; + } + + let mut accum = 0.0; + for (fnid, score) in sorted { + self.hot_set.insert(fnid); + accum += score; + if accum >= self.region_size * total { + break; + } + } + } + + fn is_hot(&self, fnid: FnId) -> bool { + self.hot_set.contains(&fnid) + } + + fn score(&self, fnid: FnId) -> f32 { + self.scores.get(&fnid).copied().unwrap_or(0.0) + } +} + +fn global_controller() -> &'static Mutex { + static CTRL: OnceLock> = OnceLock::new(); + CTRL.get_or_init(|| Mutex::new(GlobalHotController::new())) +} + +fn global_record_access(fnid: FnId) { + let mut ctrl = global_controller().lock().unwrap(); + ctrl.record_access(fnid); +} + +pub fn global_is_hot(fnid: FnId) -> bool { + let ctrl = global_controller().lock().unwrap(); + ctrl.is_hot(fnid) +} + +pub fn global_fn_score(fnid: FnId) -> f32 { + let ctrl = global_controller().lock().unwrap(); + ctrl.score(fnid) +} + +pub struct FlameCache { + capacity: usize, + entries: HashMap, + tick: u64, + ttl_ticks: u64, +} + +impl FlameCache { + pub fn new(capacity: usize) -> Self { + Self { + capacity, + entries: HashMap::new(), + tick: 0, + // Flame keeps non-hot functions in temporary space with keep-alive. + // We approximate the keep-alive window with operation ticks. + ttl_ticks: 300, + } + } + + fn next_tick(&mut self) { + self.tick += 1; + } + + fn sync_spaces(&mut self) { + for (fnid, entry) in self.entries.iter_mut() { + entry.space = if global_is_hot(*fnid) { + CacheSpace::Protected + } else { + CacheSpace::Temporary + }; + } + } + + fn is_expired_temporary(&self, entry: &CacheEntry) -> bool { + entry.space == CacheSpace::Temporary + && self.tick.saturating_sub(entry.last_touch_tick) >= self.ttl_ticks + } + + fn evict_temporary( + &mut self, + mut can_be_evict: impl FnMut(&FnId) -> bool, + ) -> Option { + let victim = self + .entries + .iter() + .filter(|(fnid, entry)| { + entry.space == CacheSpace::Temporary && can_be_evict(fnid) + }) + .min_by(|(fnid_a, entry_a), (fnid_b, entry_b)| { + let expired_a = self.is_expired_temporary(entry_a); + let expired_b = self.is_expired_temporary(entry_b); + + (!expired_a) + .cmp(&(!expired_b)) + .then_with(|| entry_a.hit_count.cmp(&entry_b.hit_count)) + .then_with(|| entry_a.last_touch_tick.cmp(&entry_b.last_touch_tick)) + .then_with(|| fnid_a.cmp(fnid_b)) + }) + .map(|(fnid, _)| *fnid); + + if let Some(victim) = victim { + self.entries.remove(&victim); + return Some(victim); + } + None + } + + fn reclaim_protected( + &mut self, + mut can_be_evict: impl FnMut(&FnId) -> bool, + ) -> Option { + let victim = self + .entries + .iter() + .filter(|(fnid, entry)| { + entry.space == CacheSpace::Protected && can_be_evict(fnid) + }) + .min_by(|(fnid_a, entry_a), (fnid_b, entry_b)| { + let age_a = self.tick.saturating_sub(entry_a.last_touch_tick).max(1) as f32; + let age_b = self.tick.saturating_sub(entry_b.last_touch_tick).max(1) as f32; + let reuse_a = (entry_a.hit_count as f32) / age_a; + let reuse_b = (entry_b.hit_count as f32) / age_b; + + reuse_a + .partial_cmp(&reuse_b) + .unwrap_or(Ordering::Equal) + .then_with(|| entry_a.last_touch_tick.cmp(&entry_b.last_touch_tick)) + .then_with(|| fnid_a.cmp(fnid_b)) + }) + .map(|(fnid, _)| *fnid); + + if let Some(victim) = victim { + self.entries.remove(&victim); + return Some(victim); + } + None + } +} + +impl InstanceCachePolicy for FlameCache { + fn get(&mut self, key: FnId) -> Option { + self.next_tick(); + global_record_access(key); + self.sync_spaces(); + + if let Some(entry) = self.entries.get_mut(&key) { + entry.hit_count += 1; + entry.last_touch_tick = self.tick; + return Some(key); + } + None + } + + fn put( + &mut self, + key: FnId, + mut can_be_evict: Box bool>, + ) -> (Option, bool) { + self.next_tick(); + global_record_access(key); + self.sync_spaces(); + + if let Some(entry) = self.entries.get_mut(&key) { + entry.hit_count += 1; + entry.last_touch_tick = self.tick; + return (None, true); + } + + let target_space = if global_is_hot(key) { + CacheSpace::Protected + } else { + CacheSpace::Temporary + }; + + let mut evicted = None; + while self.entries.len() >= self.capacity { + let victim = self.evict_temporary(|fnid| can_be_evict(fnid)).or_else(|| { + if target_space == CacheSpace::Protected { + self.reclaim_protected(|fnid| can_be_evict(fnid)) + } else { + None + } + }); + + let Some(victim) = victim else { + return (None, false); + }; + evicted = Some(victim); + } + + self.entries.insert( + key, + CacheEntry { + space: target_space, + hit_count: 1, + last_touch_tick: self.tick, + }, + ); + + (evicted, true) + } + + fn remove_all(&mut self, key: &FnId) -> bool { + self.entries.remove(key).is_some() + } +} + +unsafe impl Send for FlameCache {} diff --git a/serverless_sim/src/cache/mod.rs b/serverless_sim/src/cache/mod.rs index 9820051..547ec10 100644 --- a/serverless_sim/src/cache/mod.rs +++ b/serverless_sim/src/cache/mod.rs @@ -1,6 +1,10 @@ +pub mod faascache; pub mod fifo; +pub mod flame; pub mod lru; pub mod no_evict; +pub mod rbuc; +pub mod scache; use std::{cell::RefCell, cmp::Eq, fmt::Debug, hash::Hash, rc::Rc}; diff --git a/serverless_sim/src/cache/rbuc.rs b/serverless_sim/src/cache/rbuc.rs new file mode 100644 index 0000000..368a650 --- /dev/null +++ b/serverless_sim/src/cache/rbuc.rs @@ -0,0 +1,268 @@ +use std::cmp::Ordering; +use std::collections::{HashMap, HashSet}; +use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering}; +use std::sync::{Mutex, OnceLock}; + +use crate::fn_dag::FnId; + +use super::InstanceCachePolicy; + +struct CacheEntry { + hit_count: u64, + last_touch_tick: u64, +} + +struct GlobalBudgetController { + alpha: f32, + period_ops: u64, + op_in_period: u64, + budgets: HashMap, + max_replica_seen: HashMap, + cold_inserts: HashMap, + replicas: HashMap>, +} + +impl GlobalBudgetController { + fn new() -> Self { + Self { + alpha: 0.3, + period_ops: 200, + op_in_period: 0, + budgets: HashMap::new(), + max_replica_seen: HashMap::new(), + cold_inserts: HashMap::new(), + replicas: HashMap::new(), + } + } + + fn step_period(&mut self) { + self.op_in_period += 1; + if self.op_in_period >= self.period_ops { + self.refresh(); + } + } + + fn observe_replica_count(&mut self, fnid: FnId) { + let cur = self.replica_count(fnid); + self.max_replica_seen + .entry(fnid) + .and_modify(|v| *v = (*v).max(cur)) + .or_insert(cur); + } + + fn refresh(&mut self) { + let mut fnids = HashSet::new(); + fnids.extend(self.budgets.keys().copied()); + fnids.extend(self.max_replica_seen.keys().copied()); + fnids.extend(self.cold_inserts.keys().copied()); + fnids.extend(self.replicas.keys().copied()); + + for fnid in fnids { + let run_pressure = self.max_replica_seen.remove(&fnid).unwrap_or(0); + let cold_pressure = self.cold_inserts.remove(&fnid).unwrap_or(0); + let demand = run_pressure + cold_pressure; + let prev_budget = self.budgets.get(&fnid).copied().unwrap_or(1) as f32; + let budget = (self.alpha * (demand as f32) + (1.0 - self.alpha) * prev_budget).ceil() + as usize; + self.budgets.insert(fnid, budget.max(1)); + } + + self.op_in_period = 0; + } + + fn record_hit(&mut self, fnid: FnId) { + self.observe_replica_count(fnid); + self.step_period(); + } + + fn record_insert(&mut self, fnid: FnId, node_token: u64) { + let inserted = self.replicas.entry(fnid).or_default().insert(node_token); + if inserted { + self.cold_inserts + .entry(fnid) + .and_modify(|v| *v += 1) + .or_insert(1); + } + self.observe_replica_count(fnid); + self.step_period(); + } + + fn record_remove(&mut self, fnid: FnId, node_token: u64) { + if let Some(nodes) = self.replicas.get_mut(&fnid) { + nodes.remove(&node_token); + if nodes.is_empty() { + self.replicas.remove(&fnid); + } + } + self.observe_replica_count(fnid); + self.step_period(); + } + + fn budget(&self, fnid: FnId) -> usize { + self.budgets.get(&fnid).copied().unwrap_or(1) + } + + fn replica_count(&self, fnid: FnId) -> usize { + self.replicas.get(&fnid).map(|v| v.len()).unwrap_or(0) + } + + fn delta(&self, fnid: FnId) -> isize { + self.budget(fnid) as isize - self.replica_count(fnid) as isize + } +} + +fn global_controller() -> &'static Mutex { + static CTRL: OnceLock> = OnceLock::new(); + CTRL.get_or_init(|| Mutex::new(GlobalBudgetController::new())) +} + +fn next_node_token() -> u64 { + static NEXT: AtomicU64 = AtomicU64::new(1); + NEXT.fetch_add(1, AtomicOrdering::Relaxed) +} + +pub fn global_budget(fnid: FnId) -> usize { + let ctrl = global_controller().lock().unwrap(); + ctrl.budget(fnid) +} + +pub fn global_replica_delta(fnid: FnId) -> isize { + let ctrl = global_controller().lock().unwrap(); + ctrl.delta(fnid) +} + +pub struct RBUCCache { + capacity: usize, + node_token: u64, + entries: HashMap, + tick: u64, + ttl_ticks: u64, +} + +impl RBUCCache { + pub fn new(capacity: usize) -> Self { + Self { + capacity, + node_token: next_node_token(), + entries: HashMap::new(), + tick: 0, + ttl_ticks: 120, + } + } + + fn next_tick(&mut self) { + self.tick += 1; + } + + fn is_expired(&self, entry: &CacheEntry) -> bool { + self.tick.saturating_sub(entry.last_touch_tick) >= self.ttl_ticks + } + + fn remove_local(&mut self, fnid: FnId) -> bool { + if self.entries.remove(&fnid).is_some() { + let mut ctrl = global_controller().lock().unwrap(); + ctrl.record_remove(fnid, self.node_token); + return true; + } + false + } + + fn select_victim( + &self, + mut can_be_evict: impl FnMut(&FnId) -> bool, + surplus_only: bool, + ) -> Option { + let ctrl = global_controller().lock().unwrap(); + self.entries + .iter() + .filter(|(fnid, _)| can_be_evict(fnid)) + .filter(|(fnid, _)| !surplus_only || ctrl.delta(**fnid) < 0) + .min_by(|(fnid_a, entry_a), (fnid_b, entry_b)| { + let delta_a = ctrl.delta(**fnid_a); + let delta_b = ctrl.delta(**fnid_b); + let expired_a = self.is_expired(entry_a); + let expired_b = self.is_expired(entry_b); + + (!expired_a) + .cmp(&(!expired_b)) + .then_with(|| delta_a.cmp(&delta_b)) + .then_with(|| { + let ua = entry_a.hit_count as f32; + let ub = entry_b.hit_count as f32; + ua.partial_cmp(&ub).unwrap_or(Ordering::Equal) + }) + .then_with(|| entry_a.last_touch_tick.cmp(&entry_b.last_touch_tick)) + .then_with(|| fnid_a.cmp(fnid_b)) + }) + .map(|(fnid, _)| *fnid) + } +} + +impl InstanceCachePolicy for RBUCCache { + fn get(&mut self, key: FnId) -> Option { + self.next_tick(); + { + let mut ctrl = global_controller().lock().unwrap(); + ctrl.record_hit(key); + } + + if let Some(entry) = self.entries.get_mut(&key) { + entry.hit_count += 1; + entry.last_touch_tick = self.tick; + return Some(key); + } + + None + } + + fn put( + &mut self, + key: FnId, + mut can_be_evict: Box bool>, + ) -> (Option, bool) { + self.next_tick(); + + if let Some(entry) = self.entries.get_mut(&key) { + { + let mut ctrl = global_controller().lock().unwrap(); + ctrl.record_hit(key); + } + entry.hit_count += 1; + entry.last_touch_tick = self.tick; + return (None, true); + } + + let mut evicted = None; + while self.entries.len() >= self.capacity { + let victim = self + .select_victim(|fnid| can_be_evict(fnid), true) + .or_else(|| self.select_victim(|fnid| can_be_evict(fnid), false)); + let Some(victim) = victim else { + return (None, false); + }; + if self.remove_local(victim) { + evicted = Some(victim); + } else { + return (None, false); + } + } + + self.entries.insert( + key, + CacheEntry { + hit_count: 0, + last_touch_tick: self.tick, + }, + ); + let mut ctrl = global_controller().lock().unwrap(); + ctrl.record_insert(key, self.node_token); + + (evicted, true) + } + + fn remove_all(&mut self, key: &FnId) -> bool { + self.remove_local(*key) + } +} + +unsafe impl Send for RBUCCache {} diff --git a/serverless_sim/src/cache/scache.rs b/serverless_sim/src/cache/scache.rs new file mode 100644 index 0000000..5200356 --- /dev/null +++ b/serverless_sim/src/cache/scache.rs @@ -0,0 +1,171 @@ +use std::collections::HashMap; +use std::sync::{Mutex, OnceLock}; + +use crate::fn_dag::FnId; + +use super::InstanceCachePolicy; + +#[derive(Clone, Copy)] +struct FnProfile { + cold_start_time: f32, + size: f32, +} + +struct CacheEntry { + priority: f32, + last_touch_tick: u64, +} + +fn profile_registry() -> &'static Mutex> { + static REGISTRY: OnceLock>> = OnceLock::new(); + REGISTRY.get_or_init(|| Mutex::new(HashMap::new())) +} + +pub fn register_fn_profile(fnid: FnId, cold_start_time: f32, size: f32) { + let mut registry = profile_registry().lock().unwrap(); + registry.insert( + fnid, + FnProfile { + cold_start_time: cold_start_time.max(1.0), + size: size.max(1.0), + }, + ); +} + +fn fn_profile(fnid: FnId) -> FnProfile { + let registry = profile_registry().lock().unwrap(); + registry.get(&fnid).copied().unwrap_or(FnProfile { + cold_start_time: 1.0, + size: 1.0, + }) +} + +pub struct SCache { + capacity: usize, + clock: f32, + tick: u64, + interval_ops: u64, + interval_len: u64, + interval_freq: HashMap, + entries: HashMap, +} + +impl SCache { + pub fn new(capacity: usize) -> Self { + Self { + capacity, + clock: 0.0, + tick: 0, + interval_ops: 0, + interval_len: 200, + interval_freq: HashMap::new(), + entries: HashMap::new(), + } + } + + fn next_tick(&mut self) { + self.tick += 1; + self.interval_ops += 1; + if self.interval_ops >= self.interval_len { + self.interval_ops = 0; + self.interval_freq.clear(); + } + } + + fn compute_priority(&self, fnid: FnId, freq: u64) -> f32 { + let profile = fn_profile(fnid); + self.clock + (freq as f32) * profile.cold_start_time / profile.size + } + + fn touch_entry(&mut self, fnid: FnId) { + let freq = { + let freq = self + .interval_freq + .entry(fnid) + .and_modify(|v| *v += 1) + .or_insert(1); + *freq + }; + let priority = self.compute_priority(fnid, freq); + self.entries + .entry(fnid) + .and_modify(|entry| { + entry.priority = priority; + entry.last_touch_tick = self.tick; + }) + .or_insert(CacheEntry { + priority, + last_touch_tick: self.tick, + }); + } + + fn evict_one(&mut self, mut can_be_evict: impl FnMut(&FnId) -> bool) -> Option { + let victim = self + .entries + .iter() + .filter(|(fnid, _)| can_be_evict(fnid)) + .min_by(|(fnid_a, entry_a), (fnid_b, entry_b)| { + entry_a + .priority + .partial_cmp(&entry_b.priority) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| entry_a.last_touch_tick.cmp(&entry_b.last_touch_tick)) + .then_with(|| fnid_a.cmp(fnid_b)) + }) + .map(|(fnid, _)| *fnid); + + if let Some(victim) = victim { + let removed = self.entries.remove(&victim).unwrap(); + self.clock = self.clock.max(removed.priority); + self.interval_freq.remove(&victim); + return Some(victim); + } + + None + } +} + +impl InstanceCachePolicy for SCache { + fn get(&mut self, key: FnId) -> Option { + self.next_tick(); + if self.entries.contains_key(&key) { + self.touch_entry(key); + return Some(key); + } + None + } + + fn put( + &mut self, + key: FnId, + mut can_be_evict: Box bool>, + ) -> (Option, bool) { + self.next_tick(); + + if self.entries.contains_key(&key) { + self.touch_entry(key); + return (None, true); + } + + let mut evicted = None; + while self.entries.len() >= self.capacity { + let Some(victim) = self.evict_one(|fnid| can_be_evict(fnid)) else { + return (None, false); + }; + evicted = Some(victim); + } + + self.touch_entry(key); + (evicted, true) + } + + fn remove_all(&mut self, key: &FnId) -> bool { + let removed = self.entries.remove(key).is_some(); + if removed { + self.interval_freq.remove(key); + } + removed + } +} + +unsafe impl Send for SCache {} diff --git a/serverless_sim/src/dag_parsers/csv_parser.rs b/serverless_sim/src/dag_parsers/csv_parser.rs index cf8c66c..f24dba2 100644 --- a/serverless_sim/src/dag_parsers/csv_parser.rs +++ b/serverless_sim/src/dag_parsers/csv_parser.rs @@ -7,7 +7,6 @@ use csv::ReaderBuilder; use crate::sim_env::SimEnv; -use crate::sim_env::{self, SimEnv}; #[derive(Debug)] pub struct TaskInfo { diff --git a/serverless_sim/src/mechanism.rs b/serverless_sim/src/mechanism.rs index 9440306..b4ad00b 100644 --- a/serverless_sim/src/mechanism.rs +++ b/serverless_sim/src/mechanism.rs @@ -1,6 +1,7 @@ -use std::{ cell::{ RefCell, RefMut }, collections::HashMap }; - +use std::{ cell::{ RefCell, RefMut }, collections::HashMap, fs::{File, OpenOptions}, io::Write, path::Path, sync::{Arc, Mutex}, thread, time::{Duration, Instant}}; +use sysinfo::{Pid, PidExt, ProcessExt, System, SystemExt}; use enum_as_inner::EnumAsInner; +use threadpool::ThreadPool; use crate::{ actions::ESActionWrapper, @@ -78,10 +79,11 @@ impl CheckDup for Vec { } } -pub const SCHE_NAMES: [&'static str; 12] = [ +pub const SCHE_NAMES: [&'static str; 13] = [ "rotate", "hash", "bp_balance", + "bcws", "faasflow", "pass", "pos", @@ -94,7 +96,7 @@ pub const SCHE_NAMES: [&'static str; 12] = [ // "load_least", // "random", ]; -pub const SCALE_NUM_NAMES: [&'static str; 7] = [ +pub const SCALE_NUM_NAMES: [&'static str; 9] = [ "no", "hpa", "lass", @@ -102,12 +104,22 @@ pub const SCALE_NUM_NAMES: [&'static str; 7] = [ "full_placement", "rela", "ensure_scaler", + "Q_learning_hpa", + "knee_scaler", ]; pub const SCALE_DOWN_EXEC_NAMES: [&'static str; 1] = ["default"]; pub const SCALE_UP_EXEC_NAMES: [&'static str; 2] = ["least_task", "no"]; pub const MECH_NAMES: [&'static str; 3] = ["no_scale", "scale_sche_separated", "scale_sche_joint"]; pub const FILTER_NAMES: [&'static str; 1] = ["careful_down"]; -pub const INSTANCE_LIVE_NAMES: [&'static str; 3] = ["no_evict", "lru", "fifo"]; +pub const INSTANCE_LIVE_NAMES: [&'static str; 7] = [ + "no_evict", + "lru", + "fifo", + "flame", + "rbuc", + "faascache", + "scache", +]; pub trait Mechanism: Send { fn step( @@ -179,6 +191,7 @@ impl ConfigNewMec for Config { let allow_sche = vec![ "faasflow", "pass", + "bcws", "fnsche", "random", "greedy", @@ -204,8 +217,24 @@ impl ConfigNewMec for Config { } } "scale_sche_separated" => { - let allow_sche = vec!["random", "greedy", "hash", "rotate","load_least","pass"]; - let allow_scale_num = vec!["hpa", "lass", "temp_scaler", "full_placement", "rela"]; + let allow_sche = vec![ + "random", + "greedy", + "hash", + "rotate", + "load_least", + "pass", + "bcws", + ]; + let allow_scale_num = vec![ + "hpa", + "lass", + "temp_scaler", + "full_placement", + "rela", + "Q_learning_hpa", + "knee_scaler", + ]; let allow_scale_down_exec = vec!["default"]; let allow_scale_up_exec = vec!["least_task"]; @@ -223,7 +252,16 @@ impl ConfigNewMec for Config { } "scale_sche_joint" => { let allow_sche = vec!["pos", "bp_balance", "ensure_scheduler"]; - let allow_scale_num = vec!["hpa", "lass", "temp_scaler", "full_placement", "rela", "ensure_scaler"]; + let allow_scale_num = vec![ + "hpa", + "lass", + "temp_scaler", + "full_placement", + "rela", + "ensure_scaler", + "Q_learning_hpa", + "knee_scaler", + ]; let allow_scale_down_exec = vec!["default"]; let allow_scale_up_exec = vec!["least_task"]; if @@ -301,6 +339,21 @@ impl SimEnvObserve { pub fn new(core: SimEnvCoreState, help: SimEnvHelperState) -> Self { Self { core, help } } + + /// req_done_avg 平均每个请求处理完的时间 越低越好 + pub fn req_done_time_avg(&self) -> f32 { + if self.core.done_requests().len() == 0 { + return 0.0; + } + + let sum = self.core + .done_requests() + .iter() + .map(|req| (req.end_frame - req.begin_frame) as f32) + .sum::(); + + sum / (self.core.done_requests().len() as f32) + } } impl WithEnvHelp for SimEnvObserve { @@ -314,6 +367,11 @@ impl WithEnvCore for SimEnvObserve { } } +// 4 个线程的线程池 +lazy_static::lazy_static! { + static ref THREAD_POOL: Arc> = Arc::new(Mutex::new(ThreadPool::new(4))); +} + impl Mechanism for MechanismImpl { // 执行步进操作前的准备,根据配置选择调度、扩缩容模式 fn step( @@ -323,6 +381,12 @@ impl Mechanism for MechanismImpl { cmd_distributor: &MechCmdDistributor ) { *self.step_begin.borrow_mut() = util::now_ms(); + let pid = Pid::from_u32(std::process::id()); + let mut sys = System::new(); + sys.refresh_process(pid); + let process = sys.process(pid).unwrap(); + let start_memory = process.memory(); + let start_time = Instant::now(); match &*self.config.mech.mech_type().0 { "no_scale" => self.step_no_scaler(env, self, cmd_distributor, raw_action), "scale_sche_separated" => { @@ -333,6 +397,45 @@ impl Mechanism for MechanismImpl { "scale_sche_joint" => self.step_scale_sche_joint(env, cmd_distributor, raw_action), _ => { panic!("mech_type not supported {}", env.help.config().mech.mech_type().0) } } + let scale_name = env.help().config().mech.scale_num_conf().0.to_string(); + let sche_name = env.help().config().mech.sche_conf().0.to_string(); + let request_freq = env.help().config().request_freq.to_string(); + // 利用线程池的线程来写文件 + THREAD_POOL.lock().unwrap().execute(move || { + let elapsed_time = start_time.elapsed(); + sys.refresh_process(pid); + let process = sys.process(pid).unwrap(); + let memory_use = (process.memory() + start_memory) / 2 / 1024; + // 将该时间写入到外部 txt 文件里,文件路径为:D:\Desktop\paper_publish\experimental_results\algorithm_cost\cpu.txt + let file_path_cpu = format!( + "D:\\Desktop\\paper_publish\\experimental_results\\algorithm_cost\\{}_{}_cpu.txt", + scale_name, sche_name + ); + // 如果文件不存在则创建该文件 + if !Path::new(&file_path_cpu).exists() { + File::create(file_path_cpu.clone()).expect("Failed to create file"); + } + let mut file = OpenOptions::new() + .write(true) + .append(true) + .open(file_path_cpu) + .unwrap(); + writeln!(file, "{}", elapsed_time.as_millis()).expect("Failed to write to file"); + let file_path_mem = format!( + "D:\\Desktop\\paper_publish\\experimental_results\\algorithm_cost\\{}_{}_mem.txt", + scale_name, sche_name + ); + // 如果文件不存在则创建该文件 + if !Path::new(&file_path_mem).exists() { + File::create(file_path_mem.clone()).expect("Failed to create file"); + } + let mut file = OpenOptions::new() + .write(true) + .append(true) + .open(file_path_mem) + .unwrap(); + writeln!(file, "{}", memory_use).expect("Failed to write to file"); + }); } } diff --git a/serverless_sim/src/mechanism_conf.rs b/serverless_sim/src/mechanism_conf.rs index 4073dae..d0a395f 100644 --- a/serverless_sim/src/mechanism_conf.rs +++ b/serverless_sim/src/mechanism_conf.rs @@ -182,6 +182,43 @@ impl MechConfig { let limit = arg.parse::().expect("Please offer fifo cache policy arg"); Box::new(crate::cache::lru::LRUCache::new(limit)) } + "flame" => { + // 与现有配置兼容:允许空参数,默认容量 256。 + let limit = if arg.trim().is_empty() { + 256 + } else { + arg.parse::() + .expect("Please offer flame cache policy arg as capacity") + }; + Box::new(crate::cache::flame::FlameCache::new(limit)) + } + "rbuc" => { + let limit = if arg.trim().is_empty() { + 256 + } else { + arg.parse::() + .expect("Please offer rbuc cache policy arg as capacity") + }; + Box::new(crate::cache::rbuc::RBUCCache::new(limit)) + } + "faascache" => { + let limit = if arg.trim().is_empty() { + 256 + } else { + arg.parse::() + .expect("Please offer faascache policy arg as capacity") + }; + Box::new(crate::cache::faascache::FaasCache::new(limit)) + } + "scache" => { + let limit = if arg.trim().is_empty() { + 256 + } else { + arg.parse::() + .expect("Please offer scache policy arg as capacity") + }; + Box::new(crate::cache::scache::SCache::new(limit)) + } "no_evict" => Box::new(crate::cache::no_evict::NoEvict::new()), _ => panic!("new_instance_cache_policy"), } diff --git a/serverless_sim/src/node.rs b/serverless_sim/src/node.rs index f48da46..05021e0 100644 --- a/serverless_sim/src/node.rs +++ b/serverless_sim/src/node.rs @@ -1,4 +1,6 @@ use crate::cache::no_evict::NoEvict; +use crate::cache::faascache::register_fn_profile; +use crate::cache::scache::register_fn_profile as register_scache_profile; use crate::cache::InstanceCachePolicy; use crate::config::Config; use crate::with_env_sub::WithEnvHelp; @@ -256,6 +258,10 @@ impl Node { // log::info!("已经添加了{}", fnid); return; } + let func = env.func(fnid); + register_fn_profile(fnid, func.cold_start_time as f32, func.container_mem()); + register_scache_profile(fnid, func.cold_start_time as f32, func.container_mem()); + drop(func); let (old, flag) = unsafe { let node = NonNull::new_unchecked(self as *const Node as *mut Node); @@ -270,7 +276,7 @@ impl Node { node.container(*to_replace).unwrap().is_idle() }), ); - log::info!("old{:?}", old); + // log::info!("old{:?}", old); (old, flag) }; @@ -305,7 +311,7 @@ impl Node { // but we need to add mem to node in this frame because it's new container *self.mem.borrow_mut() += con_mem_take; } else { - log::info!("内存不够,取消缓存标记{}", fnid); + // log::info!("内存不够,取消缓存标记{}", fnid); let mut node_cache = self.instance_cache_policy.borrow_mut(); assert!(node_cache.remove_all(&fnid)); } diff --git a/serverless_sim/src/request.rs b/serverless_sim/src/request.rs index 4d60bd9..ee2a02d 100644 --- a/serverless_sim/src/request.rs +++ b/serverless_sim/src/request.rs @@ -434,12 +434,15 @@ impl SimEnv { for (dag_i, &(mut avg_frequency, cv)) in env.help.fn_call_frequency().iter() { if env.help.config().request_freq_low() { + // avg_frequency *= 0.4; avg_frequency *= 0.1; } else if env.help.config().request_freq_middle() { + // avg_frequency *= 1.2; avg_frequency *= 0.2; } else { + // avg_frequency *= 1.8; avg_frequency *= 0.3; } // avg_frequency *= 100.0; @@ -460,7 +463,7 @@ impl SimEnv { } } - // log::info!("Gen requests {total_req_cnt} at frame {}", env.current_frame()); + log::info!("Gen requests {total_req_cnt} at frame {}", env.current_frame()); } //let env = self; diff --git a/serverless_sim/src/scale/num/Q_learning_hpa.rs b/serverless_sim/src/scale/num/Q_learning_hpa.rs new file mode 100644 index 0000000..10140ea --- /dev/null +++ b/serverless_sim/src/scale/num/Q_learning_hpa.rs @@ -0,0 +1,157 @@ +// 论文 Reinforcement Learning Applicability for Resource-Based Auto-scaling in Serverless Edge Applications 复现 +// 通过 Q-learning 算法优化 HPA 的 CPU 使用率阈值配置,以提高服务质量。 + +use std::collections::HashMap; +use rand::Rng; +use crate::mechanism::SimEnvObserve; +use crate::with_env_sub::WithEnvCore; +use crate::{ actions::ESActionWrapper, fn_dag::FnId }; +use super::ScaleNum; +use super::hpa::{ HpaScaleNum, Target }; + +#[derive(Hash, Eq, PartialEq, Debug, Clone)] +enum Action { + Increase, // 增加 CPU 使用率阈值 + Decrease, // 减少 CPU 使用率阈值 + Maintain, // 保持不变 +} + +#[derive(Hash, Eq, PartialEq, Debug, Clone)] +struct State { + cpu_threshold: i32, // 当前的 CPU 使用率阈值 +} + +// Q-Learning 参数 +pub struct QLearningHpaScaleNum { + q_table: HashMap<(State, Action), f32>, // Q 表 + learning_rate: f32, // 学习率 α + discount_factor: f32, // 折扣因子 γ + exploration_rate: f32, // 探索率 ε + min_cpu_threshold: i32, // CPU 使用率最小阈值 + max_cpu_threshold: i32, // CPU 使用率最大阈值 + target_latency: f32, // 目标延迟 + tolerance: f32, // 容忍度 + hpa_scale_num: HpaScaleNum, +} + +impl QLearningHpaScaleNum { + pub fn new(target_latency: f32) -> Self { + QLearningHpaScaleNum { + q_table: HashMap::new(), + learning_rate: 0.5, + discount_factor: 0.95, + exploration_rate: 1.0, // 初始 ε 值 + min_cpu_threshold: 30, + max_cpu_threshold: 80, + target_latency, + tolerance: 0.1, + hpa_scale_num: HpaScaleNum::new(), + } + } + + /// 选择动作:基于 ε-贪婪策略 + fn choose_action(&self, state: &State) -> Action { + let mut rng = rand::thread_rng(); + if rng.gen_range(0.0..2.0) < self.exploration_rate { + // 随机选择动作 + let actions = self.get_valid_actions(state.cpu_threshold); + actions[rng.gen_range(0..actions.len())].clone() + } else { + // 选择 Q 值最大的动作 + let actions = self.get_valid_actions(state.cpu_threshold); + let mut max_q = f32::MIN; + let mut best_action = Action::Maintain; + + for action in actions { + let q = *self.q_table.get(&(state.clone(), action.clone())).unwrap_or(&0.0); + if q > max_q { + max_q = q; + best_action = action.clone(); + } + } + best_action + } + } + + /// 在合法范围内随机选择动作 + fn get_valid_actions(&self, cpu_threshold: i32) -> Vec { + let mut actions = vec![Action::Maintain]; + if cpu_threshold > self.min_cpu_threshold { + actions.push(Action::Decrease); + } + if cpu_threshold < self.max_cpu_threshold { + actions.push(Action::Increase); + } + actions + } + + /// 更新 Q 表 + fn update_q_table(&mut self, state: &State, action: &Action, reward: f32, next_state: &State) { + let current_q = *self.q_table.get(&(state.clone(), action.clone())).unwrap_or(&0.0); + let max_next_q = self + .get_valid_actions(next_state.cpu_threshold) + .iter() + .map(|a| *self.q_table.get(&(next_state.clone(), a.clone())).unwrap_or(&0.0)) + .fold(f32::MIN, f32::max); + + let new_q = current_q + + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q); + self.q_table.insert((state.clone(), action.clone()), new_q); + } + + /// 奖励函数:基于目标延迟计算奖励 + fn calculate_reward(&self, latency: f32) -> f32 { + if latency < self.target_latency * (1.0 + self.tolerance) { + (self.target_latency / latency) * 10.0 // 延迟越低奖励越高 + } else { + 1.0 // 如果超出目标范围,奖励很低 + } + } +} + +impl ScaleNum for QLearningHpaScaleNum{ + // 获得动作,拿到延迟,计算奖励,更新 Q 表 + fn scale_for_fn( + &mut self, + env: &SimEnvObserve, + fnid: FnId, + _action: &ESActionWrapper + ) -> usize { + // 使用 hpa 进行扩缩容 + let desired_container_cnt = self.hpa_scale_num.scale_for_fn(env, fnid, _action); + + // 先拿到当前 cpu 使用率阈值 + let state = State { + cpu_threshold: (self.hpa_scale_num.get_target() * 100.0) as i32, + }; + // 选择动作 + let action = self.choose_action(&state); + + // 执行动作并更新状态 + let next_cpu_threshold = match action { + Action::Increase => (state.cpu_threshold + 1).min(self.max_cpu_threshold), + Action::Decrease => (state.cpu_threshold - 1).max(self.min_cpu_threshold), + Action::Maintain => state.cpu_threshold, + }; + // 获得下一个动作的状态 + let next_state = State { + cpu_threshold: next_cpu_threshold, + }; + // 更新 hpa 算法的 cpu 使用率阈值 + self.hpa_scale_num.set_target(Target::MemUseRate(next_cpu_threshold as f32 / 100.0)); + + log::info!("当前 cpu 利用率阈值为:{},动作为:{:?},下一个状态的 cpu 利用率阈值为: {}", state.cpu_threshold, action, next_state.cpu_threshold); + + // 更新 Q 表 + let avg_latency = env.req_done_time_avg(); + let reward = self.calculate_reward(avg_latency); + self.update_q_table(&state, &action, reward, &next_state); + + // 探索率衰减 + if env.core().current_frame() > 100 && self.exploration_rate > 0.2 { + self.exploration_rate *= 0.9977; + } + + desired_container_cnt + } +} \ No newline at end of file diff --git a/serverless_sim/src/scale/num/ensure_scaler.rs b/serverless_sim/src/scale/num/ensure_scaler.rs index a9eb642..19e297d 100644 --- a/serverless_sim/src/scale/num/ensure_scaler.rs +++ b/serverless_sim/src/scale/num/ensure_scaler.rs @@ -1,11 +1,12 @@ use std::cell::{ RefCell }; -use std::collections::{ HashMap, VecDeque }; +use std::collections::{ HashMap, HashSet, VecDeque }; use crate::fn_dag::EnvFnExt; use crate::mechanism::SimEnvObserve; use crate::node::EnvNodeExt; -use crate::with_env_sub::{ WithEnvCore }; +use crate::sim_run::schedule_helper; +use crate::with_env_sub::{ WithEnvCore, WithEnvHelp }; use crate::{ actions::ESActionWrapper, fn_dag::FnId, CONTAINER_BASIC_MEM }; use super::{ down_filter::{ CarefulScaleDownFilter, ScaleFilter }, ScaleNum }; @@ -22,18 +23,56 @@ impl EnsureScaleNum { impl ScaleNum for EnsureScaleNum { fn scale_for_fn(&mut self, env: &SimEnvObserve, fnid: FnId, _action: &ESActionWrapper) -> usize { - + + let mut need_to_schedule = false; + // // 找到这一帧需要调度的函数 + // for (_req_id, req) in env.core().requests_mut().iter_mut() { + // let schedule_able_fns = schedule_helper::collect_task_to_sche( + // req, + // env, + // schedule_helper::CollectTaskConfig::All, + // ); + // for sche_fnid in schedule_able_fns.iter() { + // if sche_fnid == &fnid { + // need_to_schedule = true; + // } + // } + // } + let requests = env.core().requests(); + let current_frame = env.core().current_frame(); + for (_, req) in requests.iter().filter(|(_, req)| req.begin_frame == current_frame) { + // 拿到该请求对应的DAG + let mut walker = env.dag(req.dag_i).new_dag_walker(); + // 遍历DAG里面的所有图节点 + while let Some(fngid) = walker.next(&env.dag(req.dag_i).dag_inner) { + // 得到该图节点对应的函数 + let fnid_in_dag = env.dag_inner(req.dag_i)[fngid]; + // 累加当前函数到达的次数 + if fnid_in_dag == fnid { + need_to_schedule = true; + break; + } + } + } + + let current_frame = env.core().current_frame(); + // 当前容器数量 let cur_container_cnt = env.fn_container_cnt(fnid); // 取cur_container_cnt的根号 let sqrt_container_cnt = (cur_container_cnt as f64).sqrt().ceil() as usize; - if cur_container_cnt + sqrt_container_cnt == 0 { - 1 - } - else { - cur_container_cnt + sqrt_container_cnt + if need_to_schedule || cur_container_cnt == 0 { + + if cur_container_cnt + sqrt_container_cnt == 0{ + 1 + } + else { + cur_container_cnt + sqrt_container_cnt + } + }else { + cur_container_cnt } } diff --git a/serverless_sim/src/scale/num/hpa.rs b/serverless_sim/src/scale/num/hpa.rs index 6c0f7b5..e3c5524 100644 --- a/serverless_sim/src/scale/num/hpa.rs +++ b/serverless_sim/src/scale/num/hpa.rs @@ -35,6 +35,13 @@ impl HpaScaleNum { pub fn set_target(&mut self, tar: Target) { self.target = tar; } + // TODO:检查是否正确获取 + pub fn get_target(&self) -> f32{ + let target = match self.target { + Target::MemUseRate(rate) => rate, + }; + target + } } impl ScaleNum for HpaScaleNum { diff --git a/serverless_sim/src/scale/num/knee_scaler.rs b/serverless_sim/src/scale/num/knee_scaler.rs new file mode 100644 index 0000000..c85406f --- /dev/null +++ b/serverless_sim/src/scale/num/knee_scaler.rs @@ -0,0 +1,214 @@ +use std::collections::{BTreeMap, HashMap}; + +use crate::fn_dag::{EnvFnExt, FnId}; +use crate::mechanism::SimEnvObserve; +use crate::node::EnvNodeExt; +use crate::with_env_sub::{WithEnvCore, WithEnvHelp}; +use crate::actions::ESActionWrapper; + +use super::ScaleNum; + +#[derive(Default)] +struct FnKneeState { + throughput_by_replica: BTreeMap, + knee_replicas: Option, + idle_frames: usize, + last_scale_frame: usize, +} + +#[derive(Default)] +struct FnMetrics { + demand: f32, + throughput: f32, + avg_cpu_util: f32, + avg_mem_util: f32, +} + +pub struct KneeScaleNum { + fn_states: HashMap, + ema_alpha: f32, + util_up_threshold: f32, + util_down_threshold: f32, + cooldown_frames: usize, + idle_to_zero_frames: usize, +} + +impl KneeScaleNum { + pub fn new() -> Self { + Self { + fn_states: HashMap::new(), + ema_alpha: 0.35, + util_up_threshold: 0.72, + util_down_threshold: 0.38, + cooldown_frames: 5, + idle_to_zero_frames: 30, + } + } + + fn observe_fn_metrics(env: &SimEnvObserve, fnid: FnId) -> FnMetrics { + let mech_metric = env.help().mech_metric(); + let unsche = mech_metric.fn_unsche_req_cnt(fnid) as f32; + let ready = mech_metric + .fn_ready_sche_tasks(fnid) + .map(|s| s.len() as f32) + .unwrap_or(0.0); + drop(mech_metric); + + let mut throughput = 0.0; + let mut avg_cpu_util = 0.0; + let mut avg_mem_util = 0.0; + let mut cnt = 0; + + env.fn_containers_for_each(fnid, |container| { + throughput += container.recent_handle_speed(); + avg_cpu_util += container.cpu_use_rate(); + + let node = env.node(container.node_id); + if node.rsc_limit.mem > 0.00001 { + avg_mem_util += container.last_frame_mem / node.rsc_limit.mem; + } + cnt += 1; + }); + + if cnt > 0 { + let cntf = cnt as f32; + avg_cpu_util /= cntf; + avg_mem_util /= cntf; + } + + FnMetrics { + demand: unsche.max(ready), + throughput, + avg_cpu_util, + avg_mem_util, + } + } + + fn detect_knee_replicas(samples: &BTreeMap) -> Option { + if samples.len() < 3 { + return None; + } + + let mut points: Vec<(usize, f32)> = samples + .iter() + .map(|(x, y)| (*x, (*y).max(0.0))) + .collect(); + + // Make throughput monotonic to reduce oscillation noise. + for i in 1..points.len() { + if points[i].1 < points[i - 1].1 { + points[i].1 = points[i - 1].1; + } + } + + let x_min = points.first().unwrap().0 as f32; + let x_max = points.last().unwrap().0 as f32; + let y_min = points + .iter() + .map(|(_, y)| *y) + .fold(f32::MAX, |a, b| a.min(b)); + let y_max = points + .iter() + .map(|(_, y)| *y) + .fold(f32::MIN, |a, b| a.max(b)); + + if (x_max - x_min) < 0.00001 || (y_max - y_min) < 0.00001 { + return None; + } + + // Kneedle-style score on normalized curve: max(y_norm - x_norm). + let mut best_x = points[0].0; + let mut best_score = f32::MIN; + for (x, y) in points.iter().skip(1).take(points.len().saturating_sub(2)) { + let x_norm = ((*x as f32) - x_min) / (x_max - x_min); + let y_norm = (*y - y_min) / (y_max - y_min); + let score = y_norm - x_norm; + if score > best_score { + best_score = score; + best_x = *x; + } + } + + Some(best_x) + } +} + +impl ScaleNum for KneeScaleNum { + fn scale_for_fn(&mut self, env: &SimEnvObserve, fnid: FnId, _action: &ESActionWrapper) -> usize { + let current_frame = env.core().current_frame(); + let cur_container_cnt = env.fn_container_cnt(fnid); + let metrics = Self::observe_fn_metrics(env, fnid); + + let state = self.fn_states.entry(fnid).or_default(); + + if cur_container_cnt > 0 { + let old = state + .throughput_by_replica + .get(&cur_container_cnt) + .copied() + .unwrap_or(metrics.throughput); + let new_val = old * (1.0 - self.ema_alpha) + metrics.throughput * self.ema_alpha; + state.throughput_by_replica.insert(cur_container_cnt, new_val); + state.knee_replicas = Self::detect_knee_replicas(&state.throughput_by_replica); + } + + let has_work = metrics.demand > 0.0; + if !has_work && metrics.throughput < 0.01 { + state.idle_frames += 1; + } else { + state.idle_frames = 0; + } + + if cur_container_cnt == 0 { + return if has_work { 1 } else { 0 }; + } + + if state.idle_frames >= self.idle_to_zero_frames { + state.last_scale_frame = current_frame; + return 0; + } + + let pressure = metrics.avg_cpu_util.max(metrics.avg_mem_util); + let knee_replicas = state.knee_replicas.unwrap_or(cur_container_cnt.max(1)); + + let in_cooldown = current_frame.saturating_sub(state.last_scale_frame) < self.cooldown_frames; + let severe_overload = + has_work && (pressure > 0.9 || metrics.demand > metrics.throughput * 1.8); + if in_cooldown && !severe_overload { + return cur_container_cnt; + } + + let mut desired = cur_container_cnt; + + // Scale up when pressure/backlog is high. + if has_work && (pressure > self.util_up_threshold || metrics.demand > metrics.throughput * 1.2) { + if cur_container_cnt < knee_replicas { + desired = cur_container_cnt + 1; + } else if metrics.demand > metrics.throughput * 1.5 { + // Allow crossing knee under sustained overload. + desired = cur_container_cnt + 1; + } + } + + // Scale down conservatively when over-knee and lightly loaded. + if desired == cur_container_cnt && + cur_container_cnt > 1 && + cur_container_cnt > knee_replicas && + pressure < self.util_down_threshold && + metrics.demand <= metrics.throughput + { + desired = cur_container_cnt - 1; + } + + if has_work && desired == 0 { + desired = 1; + } + + if desired != cur_container_cnt { + state.last_scale_frame = current_frame; + } + + desired + } +} + diff --git a/serverless_sim/src/scale/num/mod.rs b/serverless_sim/src/scale/num/mod.rs index dd8b760..a34cba9 100644 --- a/serverless_sim/src/scale/num/mod.rs +++ b/serverless_sim/src/scale/num/mod.rs @@ -7,6 +7,8 @@ pub mod temp_scaler; pub mod full_placement; pub mod rela; pub mod ensure_scaler; +pub mod Q_learning_hpa; +pub mod knee_scaler; use crate::{ actions::ESActionWrapper, @@ -24,6 +26,8 @@ use self::{ full_placement::FpScaleNum, rela::RelaScaleNum, ensure_scaler::EnsureScaleNum, + Q_learning_hpa::QLearningHpaScaleNum, + knee_scaler::KneeScaleNum, }; pub trait ScaleNum: Send { @@ -63,6 +67,19 @@ pub fn new_scale_num(c: &Config) -> Option> { "ensure_scaler" => { return Some(Box::new(EnsureScaleNum::new())); } + "Q_learning_hpa" => { + let mut target_latency = 15.0; + if c.request_freq_middle() { + target_latency = 23.0; + } + else if c.request_freq_high() { + target_latency = 31.0; + } + return Some(Box::new(QLearningHpaScaleNum::new(target_latency))); + } + "knee_scaler" => { + return Some(Box::new(KneeScaleNum::new())); + } _ => { return None; } diff --git a/serverless_sim/src/scale/num/temp_scaler.rs b/serverless_sim/src/scale/num/temp_scaler.rs index 453071e..f825c59 100644 --- a/serverless_sim/src/scale/num/temp_scaler.rs +++ b/serverless_sim/src/scale/num/temp_scaler.rs @@ -8,6 +8,7 @@ use std::collections::{ HashMap, VecDeque }; use crate::fn_dag::EnvFnExt; use crate::mechanism::SimEnvObserve; use crate::node::EnvNodeExt; +use crate::sim_run::schedule_helper; use crate::with_env_sub::{ WithEnvCore }; use crate::{ actions::ESActionWrapper, fn_dag::FnId, CONTAINER_BASIC_MEM }; @@ -199,6 +200,20 @@ impl ScaleNum for TempScaleNum { let requests = env.core().requests(); // 遍历所有请求,只看当前帧到达的请求 + // for (_, req) in requests.iter(){ + // // 收集该请求中所有可以执行的函数 + // let schedule_able_fns = schedule_helper::collect_task_to_sche( + // req, + // env, + // schedule_helper::CollectTaskConfig::PreAllDone, + // ); + + // for fnid_in_req in schedule_able_fns { + // if fnid_in_req == fnid { + // fn_count += 1; + // } + // } + // } for (_, req) in requests.iter().filter(|(_, req)| req.begin_frame == current_frame) { // 拿到该请求对应的DAG let mut walker = env.dag(req.dag_i).new_dag_walker(); @@ -293,7 +308,8 @@ impl ScaleNum for TempScaleNum { if temp_change.abs() > threshold { // MARK 该增率的计算方式与论文中所写的不一致,后续有时间应该进一步实验测试对比一下现计算方式和论文中所写计算方式的优劣 // 计算容器数量的增率 - let container_inc_rate = temp_change.abs() / threshold; + // let container_inc_rate = temp_change.abs() / threshold; + let container_inc_rate = temp_change.abs() / temp_history_mean; // 统计目前已有的函数实例数量 let mut fn_instance_cnt = 0; @@ -322,10 +338,11 @@ impl ScaleNum for TempScaleNum { if temp_change > 0.0 { // MARK 该增量的计算方式与论文中所写的不一致,后续有时间应该进一步实验测试对比一下现计算方式和论文中所写计算方式的优劣 // 根据温度增量计算容器数量的增量 - let container_change = ( - (fn_instance_cnt as f64) * - (container_inc_rate - 1.0) - ).ceil() as i32; + // let container_change = ( + // (fn_instance_cnt as f64) * + // (container_inc_rate - 1.0) + // ).ceil() as i32; + let container_change = ((fn_instance_cnt as f64) * container_inc_rate).ceil() as i32; // 如果所需要的实例数量大于空闲的实例数量,则进行扩容 if container_change >= idle_fn_instance_cnt { @@ -362,7 +379,8 @@ impl ScaleNum for TempScaleNum { // 设置机制来处理 温度感知器没反应,但是函数在持续缓慢升温/降温的情况----------------------------------------------------- // 获取当前函数的所有容器,计算平均cpu、mem利用率 - if !scale_sign && cur_container_cnt != 0 { + // MARK 修改一处,增加 && fn_count > 0 + if !scale_sign && cur_container_cnt != 0 && fn_count > 0{ let mut container_avg_cpu_util = 0.0; let mut container_avg_mem_util = 0.0; @@ -397,20 +415,432 @@ impl ScaleNum for TempScaleNum { } // 对于容器数量为0的函数,如果最后一次调用距离现在的长度小于历史调用窗口长度,则变为一个容器 - if desired_container_cnt == 0 && last_call_frame + self.call_history_window_len >= current_frame { + if fn_count > 0 && desired_container_cnt == 0 { desired_container_cnt = 1; } // 对于容器数量是1的函数,如果最后一次调用距离现在的长度大于历史调用窗口长度,则缩容为0个容器 - else if desired_container_cnt == 1 && last_call_frame + self.call_history_window_len < current_frame { + else if desired_container_cnt == 1 && last_call_frame + self.call_history_window_len < 20 { + assert!(fn_count == 0); desired_container_cnt = 0; } - // log::info!("函数:{}, 在第{}帧的目标容器数量为:{}.scale_for_fn()结束", fnid, current_frame, desired_container_cnt); - - // log::info!("扩缩容器决策升温 {} 次", self.decide_to_up_count); - // log::info!("扩缩容器决策降温 {} 次", self.decide_to_down_count); - // log::info!("mem决策升温 {} 次", self.mem_decide_to_up_count); desired_container_cnt } } +// /* +// 算法流程:https://fvd360f8oos.feishu.cn/docx/QbXqdmszVo4lOsxvveacdDOMn6c?from=from_copylink +// */ + +// use std::cell::{ RefCell }; +// use std::collections::{ HashMap, VecDeque }; + +// use crate::fn_dag::EnvFnExt; +// use crate::mechanism::SimEnvObserve; +// use crate::node::EnvNodeExt; +// use crate::with_env_sub::{ WithEnvCore }; +// use crate::{ actions::ESActionWrapper, fn_dag::FnId, CONTAINER_BASIC_MEM }; + +// use super::{ down_filter::{ CarefulScaleDownFilter, ScaleFilter }, ScaleNum }; + +// // 定义Hawkes过程参数类型 +// struct HawkesParams { +// mu: f64, // 在无历史调用影响下的平均调用率 +// alpha: f64, // 单个触发事件的影响力 +// beta: f64, // 衰减率,表示过去调用对当前调用率影响的衰减速度 +// } +// impl HawkesParams { +// fn new() -> HawkesParams { +// // MARK 以下三个参数初始值可以更改 +// HawkesParams { +// mu: 0.1, +// alpha: 0.2, +// beta: 1.25, +// } +// } +// } + +// struct FrameCountTemp { +// frame: usize, +// count: usize, +// temp: f64, +// } +// impl FrameCountTemp { +// fn new(frame: usize, count: usize, temp: f64) -> FrameCountTemp { +// FrameCountTemp { frame, count, temp } +// } +// } + +// // 函数调用温度感知调度器 +// pub struct TempScaleNum { +// // 指定函数的 Hawkes 过程的的相关参数 +// fn_params: HashMap>, + +// // 函数的历史调用记录,函数 - 帧数_温度 的映射,用于计算温度 +// fn_call_history: HashMap>>, + +// // 函数的历史温度记录,函数 - 帧数_温度 的映射,帧数连续,只用于计算阈值和判断扩缩容 +// fn_temp_history: HashMap>>, + +// // 函数根据温度决定扩缩容的帧数记录 +// fn_temp_scale_sign: HashMap, + +// // 历史 调用_温度 记录窗口长度 +// call_history_window_len: usize, + +// // 函数历史温度记录的窗口长度 +// temp_history_window_len: usize, + +// // 温度感知窗口长度 +// temp_care_window_len: usize, + +// // 控制缩容时候的容器过滤策略,目前用的是 CarefulScaleDownFilter +// pub scale_down_policy: Box, + +// // 记录扩缩容器决策扩容、缩容次数 +// decide_to_up_count: usize, +// resource_decide_to_up_count: usize, +// decide_to_down_count: usize, +// } + +// impl TempScaleNum { +// pub fn new() -> Self { +// // log::info!("创建了一个 TempScaleNum 实例"); + +// Self { +// fn_params: HashMap::new(), +// fn_call_history: HashMap::new(), +// fn_temp_history: HashMap::new(), +// fn_temp_scale_sign: HashMap::new(), + +// call_history_window_len: 50, +// temp_history_window_len: 50, +// temp_care_window_len: 10, + +// scale_down_policy: Box::new(CarefulScaleDownFilter::new()), + +// decide_to_up_count: 0, +// resource_decide_to_up_count: 0, +// decide_to_down_count: 0, +// } +// } + +// // 计算指定帧数下,指定函数的温度值 +// fn compute_fn_temperature(&self, fnid: FnId, calculate_frame: usize, fn_count: usize) -> f64 { +// // 取出参数 +// let alpha = self.fn_params.get(&fnid).unwrap().borrow().alpha; +// let beta = self.fn_params.get(&fnid).unwrap().borrow().beta; +// let mu = self.fn_params.get(&fnid).unwrap().borrow().mu; + +// // 温度初始化 +// let mut temp = mu; + +// // 取出函数的调用记录 +// if let Some(call_records) = self.fn_call_history.get(&fnid) { +// // 根据 Hawkes 公式计算温度 +// for frame_count_temp in call_records.borrow().iter() { +// // 只能计算指定帧数以前的调用记录 +// if calculate_frame < frame_count_temp.frame { +// break; +// } + +// temp += +// (frame_count_temp.count as f64) * +// alpha * +// (-beta * ((calculate_frame - frame_count_temp.frame) as f64)).exp(); +// } +// } + +// // 当前帧的调用还没有被记录,所以另外计算 +// temp += (fn_count as f64) * alpha * 1.0; + +// // 取温度的对数 +// temp.ln() +// } + +// // 计算指定函数的历史温度平均值以及温度变化感知阈值(历史温度的标准差) +// fn compute_fn_temp_trans_threshold(&self, fnid: FnId) -> (f64, f64) { +// if let Some(history_ref) = self.fn_temp_history.get(&fnid) { +// // 取出温度历史记录的不可变借用 +// let history = history_ref.borrow(); + +// // 取出所有温度值 +// let mut samples: VecDeque = VecDeque::new(); +// for frame_count_temp in history.iter() { +// samples.push_back(frame_count_temp.temp); +// } + +// // 释放RefCell的borrow +// drop(history); + +// // MARK 最近的感知窗口长度的帧不计算在内 +// for _i in 0..self.temp_care_window_len { +// samples.pop_back(); +// } + +// // 求平均数 +// let mean = samples.iter().sum::() / (samples.len() as f64); + +// // 求方差 +// let variance = +// samples +// .iter() +// .map(|&x| (x - mean).powi(2)) +// .sum::() / (samples.len() as f64); + +// // 求标准差 +// let std_dev = variance.sqrt(); + +// // 将标准差作为临界值,可以根据实验情况更改 +// (mean, std_dev) +// } else { +// // 返回合适的默认值或者处理方式 +// (0.0, f64::MAX) +// } +// } +// } + +// // 实现核心 trait +// impl ScaleNum for TempScaleNum { +// // 设置指定函数的目标容器数量 +// fn scale_for_fn( +// &mut self, +// env: &SimEnvObserve, +// fnid: FnId, +// _action: &ESActionWrapper +// ) -> usize { +// // 初始化====================================================================================== +// // 获得当前帧数 +// let current_frame = env.core().current_frame(); + +// // 如果该函数是第一次进行扩缩容操作,则初始化参数、调用记录、历史记录 +// self.fn_params.entry(fnid).or_insert_with(|| RefCell::new(HawkesParams::new())); +// self.fn_call_history.entry(fnid).or_insert_with(|| RefCell::new(VecDeque::new())); +// self.fn_temp_history.entry(fnid).or_insert_with(|| RefCell::new(VecDeque::new())); +// self.fn_temp_scale_sign.entry(fnid).or_insert_with(|| 0); + +// // ============================================================================================ + +// // 更新函数的历史调用记录------------------------------------------------------------ +// // 首先统计当前函数在这一帧的到达数量 +// let mut fn_count = 0; + +// // 取出所有的请求的不可变借用 +// let requests = env.core().requests(); + +// // 遍历所有请求,只看当前帧到达的请求 +// for (_, req) in requests.iter().filter(|(_, req)| req.begin_frame == current_frame) { +// // 拿到该请求对应的DAG +// let mut walker = env.dag(req.dag_i).new_dag_walker(); +// // 遍历DAG里面的所有图节点 +// while let Some(fngid) = walker.next(&env.dag(req.dag_i).dag_inner) { +// // 得到该图节点对应的函数 +// let fnid_in_dag = env.dag_inner(req.dag_i)[fngid]; +// // 累加当前函数到达的次数 +// if fnid_in_dag == fnid { +// fn_count += 1; +// } +// } +// } + +// let current_temp = self.compute_fn_temperature(fnid, current_frame, fn_count); +// // 只有fn_count > 0 才更新调用记录 +// if fn_count > 0 { +// // 获取当前帧数的调用温度,此时当前帧的调用记录还没有被记录,所以需要额外传输一个参数 + +// // 更新该函数的历史调用记录 +// let mut call_history = self.fn_call_history.get(&fnid).unwrap().borrow_mut(); +// call_history.push_back(FrameCountTemp::new(current_frame, fn_count, current_temp)); + +// // 控制滑动窗口长度 +// if call_history.len() > self.call_history_window_len { +// call_history.pop_front(); +// } +// } +// // ---------------------------------------------------------------------------------------- + +// // MARK 为了避免借用冲突,必须放在更新历史温度记录前面 +// // 标记温度策略是否决定了扩缩容 +// let mut scale_sign = false; + +// // 当前容器数量 +// let cur_container_cnt = env.fn_container_cnt(fnid); + +// // 至少要20帧后才用温度计算扩缩容,不然样本数不够 +// let temp_history_min_len = 20; + +// // 如果记录表长度小于 10,则不进行温度决策,也不需要计算阈值, +// let mut threshold = f64::MAX; +// let mut temp_history_mean = 0.0; +// if self.fn_temp_history.get(&fnid).unwrap().borrow().len() >= temp_history_min_len { +// // 计算以前的温度的正常波动情况 +// (temp_history_mean, threshold) = self.compute_fn_temp_trans_threshold(fnid); +// } + +// // 更新函数的历史温度记录--------------------------------------------------------------- +// // 拿到历史温度记录的可变借用 +// let mut temp_recent = self.fn_temp_history.get(&fnid).unwrap().borrow_mut(); + +// // 插入到函数的历史温度记录 +// temp_recent.push_back(FrameCountTemp::new(current_frame, fn_count, current_temp)); + +// // 控制滑动窗口长度 +// if temp_recent.len() > self.temp_history_window_len { +// temp_recent.pop_front(); +// } +// // ---------------------------------------------------------------------------------------- + +// // TODO 根据历史温度记录表来决定是否扩缩容以及计算目标容器数量---------------------------------- +// // 初始化目标容器数量 +// let mut desired_container_cnt = cur_container_cnt; + +// // 如果记录表长度小于 10,或者最近10帧内使用过温度进行扩缩容,则不进行温度决策 +// if +// temp_recent.len() >= temp_history_min_len && +// current_frame - self.fn_temp_scale_sign.get(&fnid).unwrap() > self.temp_care_window_len +// { +// // 新建一个扩缩容关心温度变化记录表 +// let mut temp_care_records: VecDeque = VecDeque::new(); + +// // 从队尾往队头遍历(队尾的记录是最新的)插入temp_care_records +// for frame_count_temp in temp_recent.iter().rev() { +// temp_care_records.push_front(frame_count_temp.temp); + +// // 控制记录表长度为窗口长度 +// if temp_care_records.len() == self.temp_care_window_len { +// break; +// } +// } + +// // 得到扩缩容关心温度变化记录表的平均值 +// let temp_care_mean = +// temp_care_records.iter().sum::() / (temp_care_records.len() as f64); + +// // 计算温度增量 +// let temp_change = temp_care_mean - temp_history_mean; + +// // 如果温度增量的绝对值大于温度变化感知阈值,则进行扩缩容决策 +// if temp_change.abs() > threshold { +// // MARK 该增率的计算方式与论文中所写的不一致,后续有时间应该进一步实验测试对比一下现计算方式和论文中所写计算方式的优劣 +// // 计算容器数量的增率 +// let container_inc_rate = temp_change.abs() / threshold; + +// // 统计目前已有的函数实例数量 +// let mut fn_instance_cnt = 0; + +// // 统计目前可分配的实例数量 +// let mut idle_fn_instance_cnt = 0; + +// // 取出属于该函数的所有容器快照 +// env.fn_containers_for_each(fnid, |container| { +// // 创建一个该函数的实例需要的内存、该容器所在的节点 +// let fn_mem = env.core().fns().get(fnid).unwrap().mem; +// let node = env.node(container.node_id); + +// // 累加所有容器上的已有的函数实例数量得到总的函数实例数量 +// fn_instance_cnt += ((container.last_frame_mem - CONTAINER_BASIC_MEM) / +// fn_mem) as i32; + +// // 累加容器节点上空闲可分配的实例数量,但是这些可分配的内存是公用的,每个函数平分剩余的空闲内存 +// idle_fn_instance_cnt += ( +// (node.rsc_limit.mem - node.last_frame_mem) / +// (fn_mem * (node.fn_containers.borrow().len() as f32)) +// ).floor() as i32; +// }); + +// // 决策扩容 +// if temp_change > 0.0 { +// // MARK 该增量的计算方式与论文中所写的不一致,后续有时间应该进一步实验测试对比一下现计算方式和论文中所写计算方式的优劣 +// // 根据温度增量计算容器数量的增量 +// let container_change = ( +// (fn_instance_cnt as f64) * +// (container_inc_rate - 1.0) +// ).ceil() as i32; + +// // 如果所需要的实例数量大于空闲的实例数量,则进行扩容 +// if container_change >= idle_fn_instance_cnt { +// // 标记这一帧用温度策略决定扩缩容 +// scale_sign = true; + +// // 更新温度扩缩容记录 +// self.fn_temp_scale_sign.insert(fnid, current_frame); + +// self.decide_to_up_count += 1; + +// // 增加一个容器快照。其实应该严格按照应增加的实例数量来计算具体增加几个快照够,但是又涉及到在哪里进行扩容并计算数量的问题,该系统中实现很麻烦 +// desired_container_cnt += 1; +// } +// } else if +// // 决策缩容 +// desired_container_cnt > 1 +// { +// // 标记这一帧用温度策略决定扩缩容 +// scale_sign = true; + +// // 更新温度扩缩容记录 +// self.fn_temp_scale_sign.insert(fnid, current_frame); + +// // 记录缩容次数 +// self.decide_to_down_count += 1; + +// // 减少一个容器快照 +// desired_container_cnt -= 1; +// } +// } +// } +// // ---------------------------------------------------------------------------------------- + +// // 设置机制来处理 温度感知器没反应,但是函数在持续缓慢升温/降温的情况----------------------------------------------------- +// // 获取当前函数的所有容器,计算平均cpu、mem利用率 +// if !scale_sign && cur_container_cnt != 0 { +// let mut container_avg_cpu_util = 0.0; +// let mut container_avg_mem_util = 0.0; + +// env.fn_containers_for_each(fnid, |container| { +// // 统计cpu、mem情况 +// container_avg_cpu_util += container.cpu_use_rate(); + +// container_avg_mem_util += +// container.last_frame_mem / +// (env.node(container.node_id).left_mem() + container.last_frame_mem); +// }); +// // 计算平均 +// container_avg_mem_util /= cur_container_cnt as f32; +// container_avg_cpu_util /= cur_container_cnt as f32; + +// // 如果有一个大于80%,则进行扩容 +// if container_avg_mem_util > 0.8 || container_avg_cpu_util > 0.8 { +// self.resource_decide_to_up_count += 1; +// desired_container_cnt += 1; +// } +// } +// // ---------------------------------------------------------------------------------------- + +// // 先取出该函数的最后一次的调用时间 +// let mut last_call_frame = 0; +// match self.fn_call_history.get(&fnid).unwrap().borrow().back() { +// Some(last_call)=>{ +// last_call_frame = last_call.frame; +// }, +// None=>{ +// } +// } + +// // 对于容器数量为0的函数,如果最后一次调用距离现在的长度小于历史调用窗口长度,则变为一个容器 +// if desired_container_cnt == 0 && last_call_frame + self.call_history_window_len >= current_frame { +// desired_container_cnt = 1; +// } +// // 对于容器数量是1的函数,如果最后一次调用距离现在的长度大于历史调用窗口长度,则缩容为0个容器 +// else if desired_container_cnt == 1 && last_call_frame + self.call_history_window_len < current_frame { +// desired_container_cnt = 0; +// } + +// // log::info!("函数:{}, 在第{}帧的目标容器数量为:{}.scale_for_fn()结束", fnid, current_frame, desired_container_cnt); + +// // log::info!("扩缩容器决策升温 {} 次", self.decide_to_up_count); +// // log::info!("扩缩容器决策降温 {} 次", self.decide_to_down_count); +// // log::info!("mem决策升温 {} 次", self.mem_decide_to_up_count); + +// desired_container_cnt +// } +// } diff --git a/serverless_sim/src/scale/up_exec/least_task.rs b/serverless_sim/src/scale/up_exec/least_task.rs index 5b01835..9f669ab 100644 --- a/serverless_sim/src/scale/up_exec/least_task.rs +++ b/serverless_sim/src/scale/up_exec/least_task.rs @@ -1,9 +1,11 @@ -use super::ScaleUpExec; +use super::ScaleUpExec; +use crate::cache::flame::{global_fn_score, global_is_hot}; +use crate::cache::rbuc::global_replica_delta; use crate::mechanism_thread::{MechCmdDistributor, MechScheduleOnceRes}; use crate::node::EnvNodeExt; use crate::with_env_sub::WithEnvHelp; use crate::{ - fn_dag::FnId, + fn_dag::{EnvFnExt, FnId}, mechanism::{SimEnvObserve, UpCmd}, }; @@ -35,24 +37,95 @@ impl ScaleUpExec for LeastTaskScaleUpExec { let nodes_with_container_cnt = env.nodes().len() - nodes_no_container.len(); - // log::info!("nodes_no_container.len(): {}", nodes_no_container.len()); - // MARK 修复了一个扩容bug - if nodes_with_container_cnt < target_cnt && nodes_no_container.len() > 0 { + if nodes_with_container_cnt < target_cnt && !nodes_no_container.is_empty() { let to_scale_up_cnt = std::cmp::min( target_cnt - nodes_with_container_cnt, nodes_no_container.len(), ); - // 对不含容器的节点按照其所有任务数量进行降序排序 - nodes_no_container.sort_by(|&a, &b| { - let acnt = mech_metric().node_task_new_cnt(a); - let bcnt = mech_metric().node_task_new_cnt(b); - acnt.partial_cmp(&bcnt).unwrap() - }); - // 反转,即优先选择任务数量最少的节点进行预加载 - nodes_no_container.reverse(); + + let cache_policy = env.help().config().mech.instance_cache_policy_conf().0; + if cache_policy == "flame" && global_is_hot(fnid) { + let need_mem = env + .func(fnid) + .cold_start_container_mem_use + .max(env.func(fnid).container_mem()); + nodes_no_container.sort_by(|&a, &b| { + let flame_priority = |nid| { + let node = env.node(nid); + let reclaimable_temp_mem = node + .fn_containers + .borrow() + .iter() + .filter_map(|(fid, container)| { + if container.is_idle() && !global_is_hot(*fid) { + Some(env.func(*fid).container_mem()) + } else { + None + } + }) + .sum::(); + let available_mem = + node.left_mem_for_place_container() + reclaimable_temp_mem; + let hot_score = node + .fn_containers + .borrow() + .keys() + .filter(|fid| global_is_hot(**fid)) + .map(|fid| global_fn_score(*fid)) + .sum::(); + let priority = if available_mem + 0.00001 < need_mem { + -1.0 + } else if hot_score <= 0.00001 { + available_mem + } else { + available_mem / hot_score + }; + (priority, available_mem) + }; + let ap = flame_priority(a); + let bp = flame_priority(b); + ap.0.partial_cmp(&bp.0) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| ap.1.partial_cmp(&bp.1).unwrap_or(std::cmp::Ordering::Equal)) + }); + } else if cache_policy == "rbuc" { + let replica_delta = global_replica_delta(fnid); + nodes_no_container.sort_by(|&a, &b| { + let a_mem = env.node(a).left_mem_for_place_container(); + let b_mem = env.node(b).left_mem_for_place_container(); + let a_tasks = mech_metric().node_task_new_cnt(a); + let b_tasks = mech_metric().node_task_new_cnt(b); + + if replica_delta > 0 { + a_mem + .partial_cmp(&b_mem) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b_tasks.cmp(&a_tasks)) + } else { + b_tasks + .cmp(&a_tasks) + .then_with(|| { + a_mem + .partial_cmp(&b_mem) + .unwrap_or(std::cmp::Ordering::Equal) + }) + } + }); + } else { + // Keep original behavior: prefer nodes with fewer tasks. + nodes_no_container.sort_by(|&a, &b| { + let acnt = mech_metric().node_task_new_cnt(a); + let bcnt = mech_metric().node_task_new_cnt(b); + acnt.partial_cmp(&bcnt).unwrap() + }); + } + + if cache_policy != "rbuc" && cache_policy != "flame" { + nodes_no_container.reverse(); + } for _ in 0..to_scale_up_cnt { let node_2_load_contaienr = nodes_no_container.pop().unwrap(); - cmd_distributor.send(MechScheduleOnceRes::ScaleUpCmd(UpCmd { + let _ = cmd_distributor.send(MechScheduleOnceRes::ScaleUpCmd(UpCmd { nid: node_2_load_contaienr, fnid, })); diff --git a/serverless_sim/src/sche/bcws.rs b/serverless_sim/src/sche/bcws.rs new file mode 100644 index 0000000..1799a3a --- /dev/null +++ b/serverless_sim/src/sche/bcws.rs @@ -0,0 +1,265 @@ +use std::collections::HashMap; + +use daggy::Walker; + +use crate::{ + fn_dag::{DagId, EnvFnExt, FnDAG, FnId}, + mechanism::{MechType, MechanismImpl, ScheCmd, SimEnvObserve}, + mechanism_thread::{MechCmdDistributor, MechScheduleOnceRes}, + node::{EnvNodeExt, NodeId}, + request::Request, + sim_run::Scheduler, + with_env_sub::WithEnvCore, +}; + +pub struct BCWSScheduler { + dag_fn_ranks: HashMap>, +} + +impl BCWSScheduler { + pub fn new() -> Self { + Self { + dag_fn_ranks: HashMap::new(), + } + } + + fn prepare_rank_for_dag(&mut self, req: &Request, env: &SimEnvObserve) { + if self.dag_fn_ranks.contains_key(&req.dag_i) { + return; + } + + let dag = env.dag(req.dag_i).clone(); + let avg_node_cpu = { + let nodes = env.core().nodes(); + let total_cpu = nodes.iter().map(|node| node.rsc_limit.cpu).sum::(); + (total_cpu / (nodes.len().max(1) as f32)).max(0.0001) + }; + let min_bandwidth = env.node_btw_get_lowest().max(0.0001); + + let mut ranks = HashMap::::new(); + let mut topo_order = Vec::new(); + let mut walker = dag.new_dag_walker(); + + while let Some(func_g_i) = walker.next(&dag.dag_inner) { + topo_order.push(func_g_i); + let fnid = dag.dag_inner[func_g_i]; + let compute_cost = env.func(fnid).cpu / avg_node_cpu; + ranks.insert(fnid, compute_cost); + } + + while let Some(func_g_i) = topo_order.pop() { + let fnid = dag.dag_inner[func_g_i]; + let mut max_succ_cost: f32 = 0.0; + + for (edge, child_g_i) in dag.dag_inner.children(func_g_i).iter(&dag.dag_inner) { + let child_fnid = dag.dag_inner[child_g_i]; + let transfer_cost = dag + .dag_inner + .edge_weight(edge) + .copied() + .unwrap_or(0.0) + .max(0.0) + / min_bandwidth; + let child_rank = *ranks.get(&child_fnid).unwrap_or(&0.0); + max_succ_cost = max_succ_cost.max(transfer_cost + child_rank); + } + + if let Some(rank) = ranks.get_mut(&fnid) { + *rank += max_succ_cost; + } + } + + self.dag_fn_ranks.insert(req.dag_i, ranks); + } + + fn schedulable_fns( + &self, + req: &Request, + dag: &FnDAG, + env: &SimEnvObserve, + planned_nodes: &HashMap, + ) -> Vec { + let mut ready = Vec::new(); + let mut walker = dag.new_dag_walker(); + + 'next_fn: while let Some(func_g_i) = walker.next(&dag.dag_inner) { + let fnid = dag.dag_inner[func_g_i]; + if planned_nodes.contains_key(&fnid) { + continue; + } + + for parent in env.func(fnid).parent_fns(env) { + if !planned_nodes.contains_key(&parent) && req.get_fn_node(parent).is_none() { + continue 'next_fn; + } + } + + ready.push(fnid); + } + + ready + } + + fn candidate_nodes( + &self, + fnid: FnId, + env: &SimEnvObserve, + mech: &MechanismImpl, + ) -> Vec { + let mut nodes = match mech.mech_type() { + MechType::ScaleScheSeparated => env + .core() + .fn_2_nodes() + .get(&fnid) + .map(|node_ids| node_ids.iter().copied().collect::>()) + .unwrap_or_default(), + _ => Vec::new(), + }; + + if nodes.is_empty() { + nodes = env.core().nodes().iter().map(|node| node.node_id()).collect(); + } + + nodes + } + + fn estimate_finish_time( + &self, + fnid: FnId, + candidate_node: NodeId, + env: &SimEnvObserve, + planned_nodes: &HashMap, + node_loads: &HashMap, + ) -> f32 { + let func = env.func(fnid); + let node = env.node(candidate_node); + let compute_time = func.cpu / node.rsc_limit.cpu.max(0.0001); + let queue_delay = (*node_loads.get(&candidate_node).unwrap_or(&0) as f32) * compute_time; + + let mut transfer_ready_time: f32 = 0.0; + for parent in env.func(fnid).parent_fns(env) { + let Some(parent_node) = planned_nodes.get(&parent).copied() else { + continue; + }; + if parent_node == candidate_node { + continue; + } + + let parent_output = env.func(parent).out_put_size; + let bandwidth = env.node_get_speed_btwn(parent_node, candidate_node).max(0.0001); + transfer_ready_time = transfer_ready_time.max(parent_output / bandwidth); + } + + transfer_ready_time + queue_delay + compute_time + } + + fn select_node_for_fn( + &self, + fnid: FnId, + env: &SimEnvObserve, + mech: &MechanismImpl, + planned_nodes: &HashMap, + node_loads: &HashMap, + ) -> NodeId { + let candidates = self.candidate_nodes(fnid, env, mech); + let mut best = None::<(f32, usize, NodeId)>; + + for candidate in candidates { + let finish_time = + self.estimate_finish_time(fnid, candidate, env, planned_nodes, node_loads); + let node_tasks = *node_loads.get(&candidate).unwrap_or(&0); + let score = (finish_time, node_tasks, candidate); + + if let Some(cur_best) = best { + if score < cur_best { + best = Some(score); + } else { + best = Some(cur_best); + } + } else { + best = Some(score); + } + } + + best.map(|(_, _, node_id)| node_id).unwrap_or(0) + } + + fn schedule_for_one_req( + &mut self, + req: &Request, + env: &SimEnvObserve, + mech: &MechanismImpl, + cmd_distributor: &MechCmdDistributor, + ) { + self.prepare_rank_for_dag(req, env); + + let dag = env.dag(req.dag_i).clone(); + let dag_ranks = self.dag_fn_ranks.get(&req.dag_i).unwrap(); + let mut planned_nodes = req.fn_node.clone(); + let mut node_loads = env + .core() + .nodes() + .iter() + .map(|node| (node.node_id(), node.all_task_cnt())) + .collect::>(); + + loop { + let mut ready = self.schedulable_fns(req, &dag, env, &planned_nodes); + if ready.is_empty() { + break; + } + + ready.sort_by(|a, b| { + let rank_a = *dag_ranks.get(a).unwrap_or(&0.0); + let rank_b = *dag_ranks.get(b).unwrap_or(&0.0); + rank_b + .total_cmp(&rank_a) + .then_with(|| { + let cpu_a = env.func(*a).cpu; + let cpu_b = env.func(*b).cpu; + cpu_b.total_cmp(&cpu_a) + }) + .then_with(|| a.cmp(b)) + }); + + for fnid in ready { + if planned_nodes.contains_key(&fnid) { + continue; + } + + let node_id = + self.select_node_for_fn(fnid, env, mech, &planned_nodes, &node_loads); + planned_nodes.insert(fnid, node_id); + node_loads + .entry(node_id) + .and_modify(|count| *count += 1) + .or_insert(1); + + cmd_distributor + .send(MechScheduleOnceRes::ScheCmd(ScheCmd { + nid: node_id, + reqid: req.req_id, + fnid, + memlimit: None, + })) + .unwrap(); + } + } + } +} + +impl Scheduler for BCWSScheduler { + fn schedule_some( + &mut self, + env: &SimEnvObserve, + mech: &MechanismImpl, + cmd_distributor: &MechCmdDistributor, + ) { + for (_, req) in env.core().requests().iter() { + if req.fn_node.len() == req.fn_count(env) { + continue; + } + self.schedule_for_one_req(req, env, mech, cmd_distributor); + } + } +} diff --git a/serverless_sim/src/sche/bp_balance.rs b/serverless_sim/src/sche/bp_balance.rs index a7a5728..e4f0fc3 100644 --- a/serverless_sim/src/sche/bp_balance.rs +++ b/serverless_sim/src/sche/bp_balance.rs @@ -123,7 +123,7 @@ impl BpBalanceScheduler { let binpack = self.binpack_map.get(&fnid).unwrap(); - assert!(binpack.len() != 0 && self.latest_nodes.get(&fnid).unwrap().len() != 0); + // assert!(binpack.len() != 0 && self.latest_nodes.get(&fnid).unwrap().len() != 0); let mut avg_cpu_starve_degree = 0.0; let mut avg_mem_use_rate = 0.0; @@ -249,7 +249,7 @@ impl BpBalanceScheduler { } - if self.binpack_map.get(&fnid).unwrap().len() == 0 { + if self.binpack_map.get(&fnid).unwrap().len() == 0 && fn_scale_up_cmds.len() != 0 { panic!("fnid:{}, last_nodes_len:{}", fnid, self.latest_nodes.get(&fnid).unwrap().len()); } @@ -381,7 +381,7 @@ impl Scheduler for BpBalanceScheduler { } } // 如果需要缩容 - else if target < cur && (cur != 1 || !self.need_schedule_fn.contains(&func.fn_id)) { + else if target < cur && (cur > 1 || !self.need_schedule_fn.contains(&func.fn_id)) { // 标记可以开始bp机制 if self.mech_impl_sign.get(&func.fn_id).unwrap() == &false { log::info!("fn_id: {}, 在第 {} 帧触发机制", func.fn_id, env.core().current_frame()); @@ -432,7 +432,7 @@ impl Scheduler for BpBalanceScheduler { // 该函数没有可调度节点,表示该函数最近一直没有请求,直接跳过 if nodes.len() == 0 { self.latest_nodes.insert(func.fn_id, nodes.clone()); - assert!(binpack.len() == 0); + // assert!(binpack.len() == 0); continue; } @@ -440,14 +440,14 @@ impl Scheduler for BpBalanceScheduler { let binpack = self.binpack_map.get(&func.fn_id).unwrap(); // 如果扩缩容器没有缩容,那么遍历每个容器,对binpack数组外的容器进行超时缩容------------------------------------------ - if scale_down_sign == false { + if /* scale_down_sign == false */ true { env.fn_containers_for_each(func.fn_id, |container| { // 对于不是binpack数组中的节点,进行超时缩容 if !binpack.contains(&container.node_id) { // 如果该容器最近50帧都是空闲则缩容 - if container.recent_frame_is_idle(50) && container.req_fn_state.len() == 0 { + if container.recent_frame_is_idle(20) && container.req_fn_state.len() == 0 { // 发送缩容命令 cmd_distributor @@ -468,7 +468,7 @@ impl Scheduler for BpBalanceScheduler { // 超时缩容完成---------------------------------------------------------------------------------------- // 到这里来的时候nodes集合一定不为空 - assert!(nodes.len() != 0); + // assert!(nodes.len() != 0); // 更新该函数的最新可调度节点集合 self.latest_nodes.insert(func.fn_id, nodes.clone()); @@ -479,7 +479,7 @@ impl Scheduler for BpBalanceScheduler { // 当binpack数组为空时,把所有节点都加进去 if binpack.len() == 0 { self.binpack_map.insert(func.fn_id, nodes.clone()); - assert!(self.binpack_map.get(&func.fn_id).unwrap().len() != 0); + // assert!(self.binpack_map.get(&func.fn_id).unwrap().len() != 0); } // 计算该函数binpack数组内的资源利用率,以及得出其内、其外的空闲资源最多的节点id @@ -490,7 +490,7 @@ impl Scheduler for BpBalanceScheduler { let binpack = self.binpack_map.get(&func.fn_id).unwrap(); - assert!(bplist_resource_status.avg_cpu_starve_degree != 0.0); + // assert!(bplist_resource_status.avg_cpu_starve_degree != 0.0); // 退出循环逻辑 if bplist_resource_status.avg_cpu_starve_degree < CPU_THRESHOLD_TO_REMOVE && binpack.len() == 1{ diff --git a/serverless_sim/src/sche/ensure_scheduler.rs b/serverless_sim/src/sche/ensure_scheduler.rs index 7e0d703..5931c02 100644 --- a/serverless_sim/src/sche/ensure_scheduler.rs +++ b/serverless_sim/src/sche/ensure_scheduler.rs @@ -4,12 +4,7 @@ use std::{ }; use crate::{ - fn_dag::{EnvFnExt, FnId}, - mechanism::{DownCmd, MechType, MechanismImpl, ScheCmd, SimEnvObserve}, - mechanism_thread::{MechCmdDistributor, MechScheduleOnceRes}, - node::{self, EnvNodeExt, Node, NodeId}, - sim_run::{schedule_helper, Scheduler}, - with_env_sub::WithEnvCore, + fn_dag::{EnvFnExt, FnId}, mechanism::{DownCmd, MechType, MechanismImpl, ScheCmd, SimEnvObserve}, mechanism_thread::{MechCmdDistributor, MechScheduleOnceRes}, node::{self, EnvNodeExt, Node, NodeId}, sche, sim_run::{schedule_helper, Scheduler}, with_env_sub::{WithEnvCore, WithEnvHelp} }; struct NodeCpuResc { @@ -88,7 +83,7 @@ impl Scheduler for EnsureScheduler { let schedule_able_fns = schedule_helper::collect_task_to_sche( req, env, - schedule_helper::CollectTaskConfig::PreAllSched, + schedule_helper::CollectTaskConfig::All, ); for fnid in schedule_able_fns.iter() { need_schedule_fn.insert(*fnid); @@ -117,11 +112,23 @@ impl Scheduler for EnsureScheduler { nodes.insert(cmd.nid); } } + else if target == 0 && need_schedule_fn.contains(&func.fn_id) { + let up_cmd = mech.scale_up_exec().exec_scale_up( + 1, + func.fn_id, env, + cmd_distributor + ); + + // 实时更新函数的节点情况 + for cmd in up_cmd.iter() { + nodes.insert(cmd.nid); + } + } if !need_schedule_fn.contains(&func.fn_id) { env.fn_containers_for_each(func.fn_id, |container| { // 如果该容器最近50帧都是空闲则缩容 - if container.recent_frame_is_idle(50) && container.req_fn_state.len() == 0 { + if container.recent_frame_is_idle(20) && container.req_fn_state.len() == 0 { // 发送缩容命令 cmd_distributor .send(MechScheduleOnceRes::ScaleDownCmd(DownCmd @@ -136,7 +143,7 @@ impl Scheduler for EnsureScheduler { }); } - log::info!("fn {}, nodes.len() = {}", func.fn_id, nodes.len()); + // log::info!("fn {}, nodes.len() = {}", func.fn_id, nodes.len()); self.fn_nodes.insert(func.fn_id, nodes.clone()); } @@ -149,12 +156,18 @@ impl Scheduler for EnsureScheduler { //迭代请求中的函数,选择最合适的节点进行调度 for fnid in fns { - let sche_nodeid = self.select_best_node_to_fn(fnid, env); + let nodes = self.fn_nodes.get(&fnid).unwrap(); - log::info!("schedule fn {} to node {}", fnid, sche_nodeid); + let mut sche_nodeid = self.select_best_node_to_fn(fnid, env); - if sche_nodeid != 9999 { - cmd_distributor + log::info!("schedule fn {} to node {}. nodes.len() = {}", fnid, sche_nodeid, nodes.len()); + + if sche_nodeid == 9999 { + assert!(nodes.len() == 0); + sche_nodeid = env.core().current_frame() % env.core().nodes().len(); + } + + cmd_distributor .send(MechScheduleOnceRes::ScheCmd(ScheCmd { nid: sche_nodeid, reqid: req.req_id, @@ -162,8 +175,8 @@ impl Scheduler for EnsureScheduler { memlimit: None, })) .unwrap(); - self.node_cpu_usage.get_mut(&sche_nodeid).unwrap().all_task_cnt += 1.0; - } + self.node_cpu_usage.get_mut(&sche_nodeid).unwrap().all_task_cnt += 1.0; + } } diff --git a/serverless_sim/src/sche/load_least.rs b/serverless_sim/src/sche/load_least.rs index 0a1f4d4..9df1a50 100644 --- a/serverless_sim/src/sche/load_least.rs +++ b/serverless_sim/src/sche/load_least.rs @@ -79,12 +79,15 @@ impl Scheduler for LoadLeastScheduler { //迭代请求中的函数,选择最合适的节点进行调度 for fnid in fns { - let sche_nodeid = self.select_best_node_to_fn(fnid, env); + let mut sche_nodeid = self.select_best_node_to_fn(fnid, env); log::info!("schedule fn {} to node {}", fnid, sche_nodeid); - if sche_nodeid != 9999 { - cmd_distributor + if sche_nodeid == 9999 { + assert!(self.fn_nodes.get(&fnid).unwrap().len() == 0); + sche_nodeid = env.core().current_frame() % env.core().nodes().len(); + } + cmd_distributor .send(MechScheduleOnceRes::ScheCmd(ScheCmd { nid: sche_nodeid, reqid: req.req_id, @@ -93,9 +96,8 @@ impl Scheduler for LoadLeastScheduler { })) .unwrap(); - let tasks_cnt = self.node_cpu_usage.get(&sche_nodeid).unwrap(); - self.node_cpu_usage.insert(sche_nodeid, tasks_cnt + 1); - } + let tasks_cnt = self.node_cpu_usage.get(&sche_nodeid).unwrap(); + self.node_cpu_usage.insert(sche_nodeid, tasks_cnt + 1); } } diff --git a/serverless_sim/src/sche/mod.rs b/serverless_sim/src/sche/mod.rs index 3973168..a796baa 100644 --- a/serverless_sim/src/sche/mod.rs +++ b/serverless_sim/src/sche/mod.rs @@ -3,6 +3,7 @@ use bp_balance::BpBalanceScheduler; use crate::{ config::Config, sim_run::Scheduler }; use self::{ + bcws::BCWSScheduler, consistenthash::ConsistentHashScheduler, // rule_based::{RuleBasedScheduler, ScheduleRule}, // time_aware::TimeScheduler, faasflow::FaasFlowScheduler, @@ -25,6 +26,7 @@ pub mod pass; pub mod pos; pub mod random; pub mod bp_balance; +pub mod bcws; pub mod hash; pub mod rotate; pub mod ensure_scheduler; @@ -61,6 +63,9 @@ pub fn prepare_spec_scheduler(config: &Config) -> Option { return Some(Box::new(BpBalanceScheduler::new())); } + "bcws" => { + return Some(Box::new(BCWSScheduler::new())); + } "consistenthash" => { return Some(Box::new(ConsistentHashScheduler::new())); } diff --git a/serverless_sim/src/sche/pass.rs b/serverless_sim/src/sche/pass.rs index 4fef07f..73e778e 100644 --- a/serverless_sim/src/sche/pass.rs +++ b/serverless_sim/src/sche/pass.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use daggy::Walker; use rand::Rng; @@ -90,19 +90,44 @@ impl PassScheduler { env: &SimEnvObserve, ) { let func = env.func(func_id); + + // let mut nodes = HashSet::new(); + let nodes = env.core().nodes(); + let nodes_sche = env + .core() + .fn_2_nodes() + .get(&func.fn_id) + .map(|v| v.clone().into_iter().collect()) + .unwrap_or_else(Vec::new); + + let mut nodes_len = 0; + + if nodes_sche.len() == 0 { + nodes_len = nodes.len(); + } + else { + nodes_len = nodes_sche.len(); + } let func_pres_id = func.parent_fns(env); log::info!("func {} pres {:?}", func_id, func_pres_id); if func_pres_id.len() == 0 { let mut rng = rand::thread_rng(); - let rand = rng.gen_range(0..nodes.len()); + let rand = rng.gen_range(0..nodes_len); + let mut sche_nodeid = rand; + + if nodes_sche.len() != 0 { + sche_nodeid = nodes_sche[rand]; + } + + // let rand = rng.gen_range(0..nodes.len()); schedule_to_map.insert(func_id, rand); // schedule_to.push((func_id, rand)); cmd_distributor .send(MechScheduleOnceRes::ScheCmd(ScheCmd { - nid: rand, + nid: sche_nodeid, reqid: req.req_id, fnid: func_id, memlimit: None, diff --git a/serverless_sim/src/sche/pos.rs b/serverless_sim/src/sche/pos.rs index e88a532..5844197 100644 --- a/serverless_sim/src/sche/pos.rs +++ b/serverless_sim/src/sche/pos.rs @@ -124,11 +124,11 @@ impl PosScheduler { // env.func(a).cpu.partial_cmp(&env.func(b).cpu).unwrap().reverse() // }); - log::info!( - "schedule_some sort fns cost {}", - // req.req_id, - util::now_ms() - *mech.step_begin.borrow() - ); + // log::info!( + // "schedule_some sort fns cost {}", + // // req.req_id, + // util::now_ms() - *mech.step_begin.borrow() + // ); let scale_up_exec = mech.scale_up_exec(); let mech_metric = || env.help().mech_metric_mut(); @@ -149,12 +149,12 @@ impl PosScheduler { for cmd in fn_scale_up_cmds.iter() { self.record_new_scale_up_node(cmd.fnid, cmd.nid); } - log::info!( - "schedule_some schduling fn {} {}", - fnid, - // req.req_id, - util::now_ms() - *mech.step_begin.borrow() - ); + // log::info!( + // "schedule_some schduling fn {} {}", + // fnid, + // // req.req_id, + // util::now_ms() - *mech.step_begin.borrow() + // ); // 选择节点算法,首先选出包含当前函数容器的节点 let mut nodes2select = self.new_scale_up_nodes(fnid); @@ -245,10 +245,10 @@ impl Scheduler for PosScheduler { for (_req_id, req) in env.core().requests().iter() { self.collect_scheable_fns_for_req(env, req); } - log::info!( - "schedule_some collect_scheable_fns_for_req cost {}", - util::now_ms() - *mech.step_begin.borrow() - ); + // log::info!( + // "schedule_some collect_scheable_fns_for_req cost {}", + // util::now_ms() - *mech.step_begin.borrow() + // ); // log::info!("try put fn"); // let nodes_taskcnt = env // .nodes() @@ -267,21 +267,21 @@ impl Scheduler for PosScheduler { cur - target, cmd_distributor ); - log::info!( - "schedule_some scale_down_exec {} cost {}", - func.fn_id, - util::now_ms() - *mech.step_begin.borrow() - ); + // log::info!( + // "schedule_some scale_down_exec {} cost {}", + // func.fn_id, + // util::now_ms() - *mech.step_begin.borrow() + // ); } } for r in &self.sche_queue { self.schedule_one_req_fns(env, mech, r, cmd_distributor); - log::info!( - "schedule_some schedule_one_req_fns {} cost {}", - r.0, - util::now_ms() - *mech.step_begin.borrow() - ); + // log::info!( + // "schedule_some schedule_one_req_fns {} cost {}", + // r.0, + // util::now_ms() - *mech.step_begin.borrow() + // ); } } } diff --git a/serverless_sim/src/sche/rotate.rs b/serverless_sim/src/sche/rotate.rs index 3dbe36c..5b845ee 100644 --- a/serverless_sim/src/sche/rotate.rs +++ b/serverless_sim/src/sche/rotate.rs @@ -58,6 +58,7 @@ impl RotateScheduler { if !node_list.is_empty() { node_id = node_list[(self.last_schedule_node_id + 1) % node_list.len()]; + self.last_schedule_node_id = node_id; } cmd_distributor