tools/perf/tests/shell/lib/perf_metric_validation.py

   1 #SPDX-License-Identifier: GPL-2.0
   2 import re
   3 import csv
   4 import json
   5 import argparse
   6 from pathlib import Path
   7 import subprocess
   8
   9 class Validator:
  10     def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
  11         self.rulefname = rulefname
  12         self.reportfname = reportfname
  13         self.rules = None
  14         self.collectlist:str = metrics
  15         self.metrics = self.__set_metrics(metrics)
  16         self.skiplist = set()
  17         self.tolerance = t
  18
  19         self.workloads = [x for x in workload.split(",") if x]
  20         self.wlidx = 0 # idx of current workloads
  21         self.allresults = dict() # metric results of all workload
  22         self.allignoremetrics = dict() # metrics with no results or negative results
  23         self.allfailtests = dict()
  24         self.alltotalcnt = dict()
  25         self.allpassedcnt = dict()
  26         self.allerrlist = dict()
  27
  28         self.results = dict() # metric results of current workload
  29         # vars for test pass/failure statistics
  30         self.ignoremetrics= set() # metrics with no results or negative results, neg result counts as a failed test
  31         self.failtests = dict()
  32         self.totalcnt = 0
  33         self.passedcnt = 0
  34         # vars for errors
  35         self.errlist = list()
  36
  37         # vars for Rule Generator
  38         self.pctgmetrics = set() # Percentage rule
  39
  40         # vars for debug
  41         self.datafname = datafname
  42         self.debug = debug
  43         self.fullrulefname = fullrulefname
  44
  45     def __set_metrics(self, metrics=''):
  46         if metrics != '':
  47             return set(metrics.split(","))
  48         else:
  49             return set()
  50
  51     def read_json(self, filename: str) -> dict:
  52         try:
  53             with open(Path(filename).resolve(), "r") as f:
  54                 data = json.loads(f.read())
  55         except OSError as e:
  56             print(f"Error when reading file {e}")
  57             sys.exit()
  58
  59         return data
  60
  61     def json_dump(self, data, output_file):
  62         parent = Path(output_file).parent
  63         if not parent.exists():
  64             parent.mkdir(parents=True)
  65
  66         with open(output_file, "w+") as output_file:
  67             json.dump(data,
  68                       output_file,
  69                       ensure_ascii=True,
  70                       indent=4)
  71
  72     def get_results(self, idx:int = 0):
  73         return self.results[idx]
  74
  75     def get_bounds(self, lb, ub, error, alias={}, ridx:int = 0) -> list:
  76         """
  77         Get bounds and tolerance from lb, ub, and error.
  78         If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
  79
  80         @param lb: str/float, lower bound
  81         @param ub: str/float, upper bound
  82         @param error: float/str, error tolerance
  83         @returns: lower bound, return inf if the lower bound is a metric value and is not collected
  84                   upper bound, return -1 if the upper bound is a metric value and is not collected
  85                   tolerance, denormalized base on upper bound value
  86         """
  87         # init ubv and lbv to invalid values
  88         def get_bound_value (bound, initval, ridx):
  89             val = initval
  90             if isinstance(bound, int) or isinstance(bound, float):
  91                 val = bound
  92             elif isinstance(bound, str):
  93                 if bound == '':
  94                     val = float("inf")
  95                 elif bound in alias:
  96                     vall = self.get_value(alias[ub], ridx)
  97                     if vall:
  98                         val = vall[0]
  99                 elif bound.replace('.', '1').isdigit():
 100                     val = float(bound)
 101                 else:
 102                     print("Wrong bound: {0}".format(bound))
 103             else:
 104                 print("Wrong bound: {0}".format(bound))
 105             return val
 106
 107         ubv = get_bound_value(ub, -1, ridx)
 108         lbv = get_bound_value(lb, float('inf'), ridx)
 109         t = get_bound_value(error, self.tolerance, ridx)
 110
 111         # denormalize error threshold
 112         denormerr = t * ubv / 100 if ubv != 100 and ubv > 0 else t
 113
 114         return lbv, ubv, denormerr
 115
 116     def get_value(self, name:str, ridx:int = 0) -> list:
 117         """
 118         Get value of the metric from self.results.
 119         If result of this metric is not provided, the metric name will be added into self.ignoremetics and self.errlist.
 120         All future test(s) on this metric will fail.
 121
 122         @param name: name of the metric
 123         @returns: list with value found in self.results; list is empty when value is not found.
 124         """
 125         results = []
 126         data = self.results[ridx] if ridx in self.results else self.results[0]
 127         if name not in self.ignoremetrics:
 128             if name in data:
 129                 results.append(data[name])
 130             elif name.replace('.', '1').isdigit():
 131                 results.append(float(name))
 132             else:
 133                 self.ignoremetrics.add(name)
 134         return results
 135
 136     def check_bound(self, val, lb, ub, err):
 137         return True if val <= ub + err and val >= lb - err else False
 138
 139     # Positive Value Sanity check
 140     def pos_val_test(self):
 141         """
 142         Check if metrics value are non-negative.
 143         One metric is counted as one test.
 144         Failure: when metric value is negative or not provided.
 145         Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
 146         """
 147         negmetric = dict()
 148         pcnt = 0
 149         tcnt = 0
 150         rerun = list()
 151         for name, val in self.get_results().items():
 152             if val < 0:
 153                 negmetric[name] = val
 154                 rerun.append(name)
 155             else:
 156                 pcnt += 1
 157             tcnt += 1
 158         if len(rerun) > 0 and len(rerun) < 20:
 159             second_results = dict()
 160             self.second_test(rerun, second_results)
 161             for name, val in second_results.items():
 162                 if name not in negmetric: continue
 163                 if val >= 0:
 164                     del negmetric[name]
 165                     pcnt += 1
 166
 167         self.failtests['PositiveValueTest']['Total Tests'] = tcnt
 168         self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
 169         if len(negmetric.keys()):
 170             self.ignoremetrics.update(negmetric.keys())
 171             negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
 172             self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
 173
 174         return
 175
 176     def evaluate_formula(self, formula:str, alias:dict, ridx:int = 0):
 177         """
 178         Evaluate the value of formula.
 179
 180         @param formula: the formula to be evaluated
 181         @param alias: the dict has alias to metric name mapping
 182         @returns: value of the formula is success; -1 if the one or more metric value not provided
 183         """
 184         stack = []
 185         b = 0
 186         errs = []
 187         sign = "+"
 188         f = str()
 189
 190         #TODO: support parenthesis?
 191         for i in range(len(formula)):
 192             if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
 193                 s = alias[formula[b:i]] if i+1 < len(formula) else alias[formula[b:]]
 194                 v = self.get_value(s, ridx)
 195                 if not v:
 196                     errs.append(s)
 197                 else:
 198                     f = f + "{0}(={1:.4f})".format(s, v[0])
 199                     if sign == "*":
 200                         stack[-1] = stack[-1] * v
 201                     elif sign == "/":
 202                         stack[-1] = stack[-1] / v
 203                     elif sign == '-':
 204                         stack.append(-v[0])
 205                     else:
 206                         stack.append(v[0])
 207                 if i + 1 < len(formula):
 208                     sign = formula[i]
 209                     f += sign
 210                     b = i + 1
 211
 212         if len(errs) > 0:
 213             return -1, "Metric value missing: "+','.join(errs)
 214
 215         val = sum(stack)
 216         return val, f
 217
 218     # Relationships Tests
 219     def relationship_test(self, rule: dict):
 220         """
 221         Validate if the metrics follow the required relationship in the rule.
 222         eg. lower_bound <= eval(formula)<= upper_bound
 223         One rule is counted as ont test.
 224         Failure: when one or more metric result(s) not provided, or when formula evaluated outside of upper/lower bounds.
 225
 226         @param rule: dict with metric name(+alias), formula, and required upper and lower bounds.
 227         """
 228         alias = dict()
 229         for m in rule['Metrics']:
 230             alias[m['Alias']] = m['Name']
 231         lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
 232         val, f = self.evaluate_formula(rule['Formula'], alias, ridx=rule['RuleIndex'])
 233         if val == -1:
 234             self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Description':f})
 235         elif not self.check_bound(val, lbv, ubv, t):
 236             lb = rule['RangeLower']
 237             ub = rule['RangeUpper']
 238             if isinstance(lb, str):
 239                 if lb in alias:
 240                     lb = alias[lb]
 241             if isinstance(ub, str):
 242                 if ub in alias:
 243                     ub = alias[ub]
 244             self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Formula':f,
 245                                                                        'RangeLower': lb, 'LowerBoundValue': self.get_value(lb),
 246                                                                        'RangeUpper': ub, 'UpperBoundValue':self.get_value(ub),
 247                                                                        'ErrorThreshold': t, 'CollectedValue': val})
 248         else:
 249             self.passedcnt += 1
 250             self.failtests['RelationshipTest']['Passed Tests'] += 1
 251         self.totalcnt += 1
 252         self.failtests['RelationshipTest']['Total Tests'] += 1
 253
 254         return
 255
 256
 257     # Single Metric Test
 258     def single_test(self, rule:dict):
 259         """
 260         Validate if the metrics are in the required value range.
 261         eg. lower_bound <= metrics_value <= upper_bound
 262         One metric is counted as one test in this type of test.
 263         One rule may include one or more metrics.
 264         Failure: when the metric value not provided or the value is outside the bounds.
 265         This test updates self.total_cnt and records failed tests in self.failtest['SingleMetricTest'].
 266
 267         @param rule: dict with metrics to validate and the value range requirement
 268         """
 269         lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
 270         metrics = rule['Metrics']
 271         passcnt = 0
 272         totalcnt = 0
 273         faillist = list()
 274         failures = dict()
 275         rerun = list()
 276         for m in metrics:
 277             totalcnt += 1
 278             result = self.get_value(m['Name'])
 279             if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
 280                 passcnt += 1
 281             else:
 282                 failures[m['Name']] = result
 283                 rerun.append(m['Name'])
 284
 285         if len(rerun) > 0 and len(rerun) < 20:
 286             second_results = dict()
 287             self.second_test(rerun, second_results)
 288             for name, val in second_results.items():
 289                 if name not in failures: continue
 290                 if self.check_bound(val, lbv, ubv, t):
 291                     passcnt += 1
 292                     del failures[name]
 293                 else:
 294                     failures[name] = val
 295                     self.results[0][name] = val
 296
 297         self.totalcnt += totalcnt
 298         self.passedcnt += passcnt
 299         self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
 300         self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
 301         if len(failures.keys()) != 0:
 302             faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
 303             self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
 304                                                                        'RangeLower': rule['RangeLower'],
 305                                                                        'RangeUpper': rule['RangeUpper'],
 306                                                                        'ErrorThreshold':rule['ErrorThreshold'],
 307                                                                        'Failure':faillist})
 308
 309         return
 310
 311     def create_report(self):
 312         """
 313         Create final report and write into a JSON file.
 314         """
 315         alldata = list()
 316         for i in range(0, len(self.workloads)):
 317             reportstas = {"Total Rule Count": self.alltotalcnt[i], "Passed Rule Count": self.allpassedcnt[i]}
 318             data = {"Metric Validation Statistics": reportstas, "Tests in Category": self.allfailtests[i],
 319                     "Errors":self.allerrlist[i]}
 320             alldata.append({"Workload": self.workloads[i], "Report": data})
 321
 322         json_str = json.dumps(alldata, indent=4)
 323         print("Test validation finished. Final report: ")
 324         print(json_str)
 325
 326         if self.debug:
 327             allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]} for i in range(0, len(self.workloads))]
 328             self.json_dump(allres, self.datafname)
 329
 330     def check_rule(self, testtype, metric_list):
 331         """
 332         Check if the rule uses metric(s) that not exist in current platform.
 333
 334         @param metric_list: list of metrics from the rule.
 335         @return: False when find one metric out in Metric file. (This rule should not skipped.)
 336                  True when all metrics used in the rule are found in Metric file.
 337         """
 338         if testtype == "RelationshipTest":
 339             for m in metric_list:
 340                 if m['Name'] not in self.metrics:
 341                     return False
 342         return True
 343
 344     # Start of Collector and Converter
 345     def convert(self, data: list, metricvalues:dict):
 346         """
 347         Convert collected metric data from the -j output to dict of {metric_name:value}.
 348         """
 349         for json_string in data:
 350             try:
 351                 result =json.loads(json_string)
 352                 if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
 353                     name = result["metric-unit"].split("  ")[1] if len(result["metric-unit"].split("  ")) > 1 \
 354                         else result["metric-unit"]
 355                     metricvalues[name.lower()] = float(result["metric-value"])
 356             except ValueError as error:
 357                 continue
 358         return
 359
 360     def _run_perf(self, metric, workload: str):
 361         tool = 'perf'
 362         command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
 363         wl = workload.split()
 364         command.extend(wl)
 365         print(" ".join(command))
 366         cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
 367         data = [x+'}' for x in cmd.stderr.split('}\n') if x]
 368         return data
 369
 370
 371     def collect_perf(self, workload: str):
 372         """
 373         Collect metric data with "perf stat -M" on given workload with -a and -j.
 374         """
 375         self.results = dict()
 376         print(f"Starting perf collection")
 377         print(f"Long workload: {workload}")
 378         collectlist = dict()
 379         if self.collectlist != "":
 380             collectlist[0] = {x for x in self.collectlist.split(",")}
 381         else:
 382             collectlist[0] = set(list(self.metrics))
 383         # Create metric set for relationship rules
 384         for rule in self.rules:
 385             if rule["TestType"] == "RelationshipTest":
 386                 metrics = [m["Name"] for m in rule["Metrics"]]
 387                 if not any(m not in collectlist[0] for m in metrics):
 388                     collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
 389
 390         for idx, metrics in collectlist.items():
 391             if idx == 0: wl = "true"
 392             else: wl = workload
 393             for metric in metrics:
 394                 data = self._run_perf(metric, wl)
 395                 if idx not in self.results: self.results[idx] = dict()
 396                 self.convert(data, self.results[idx])
 397         return
 398
 399     def second_test(self, collectlist, second_results):
 400         workload = self.workloads[self.wlidx]
 401         for metric in collectlist:
 402             data = self._run_perf(metric, workload)
 403             self.convert(data, second_results)
 404
 405     # End of Collector and Converter
 406
 407     # Start of Rule Generator
 408     def parse_perf_metrics(self):
 409         """
 410         Read and parse perf metric file:
 411         1) find metrics with '1%' or '100%' as ScaleUnit for Percent check
 412         2) create metric name list
 413         """
 414         command = ['perf', 'list', '-j', '--details', 'metrics']
 415         cmd = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
 416         try:
 417             data = json.loads(cmd.stdout)
 418             for m in data:
 419                 if 'MetricName' not in m:
 420                     print("Warning: no metric name")
 421                     continue
 422                 name = m['MetricName'].lower()
 423                 self.metrics.add(name)
 424                 if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
 425                     self.pctgmetrics.add(name.lower())
 426         except ValueError as error:
 427             print(f"Error when parsing metric data")
 428             sys.exit()
 429
 430         return
 431
 432     def remove_unsupported_rules(self, rules):
 433         new_rules = []
 434         for rule in rules:
 435             add_rule = True
 436             for m in rule["Metrics"]:
 437                 if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
 438                     add_rule = False
 439                     break
 440             if add_rule:
 441                 new_rules.append(rule)
 442         return new_rules
 443
 444     def create_rules(self):
 445         """
 446         Create full rules which includes:
 447         1) All the rules from the "relationshi_rules" file
 448         2) SingleMetric rule for all the 'percent' metrics
 449
 450         Reindex all the rules to avoid repeated RuleIndex
 451         """
 452         data = self.read_json(self.rulefname)
 453         rules = data['RelationshipRules']
 454         self.skiplist = set([name.lower() for name in data['SkipList']])
 455         self.rules = self.remove_unsupported_rules(rules)
 456         pctgrule = {'RuleIndex':0,
 457                     'TestType':'SingleMetricTest',
 458                     'RangeLower':'0',
 459                     'RangeUpper': '100',
 460                     'ErrorThreshold': self.tolerance,
 461                     'Description':'Metrics in percent unit have value with in [0, 100]',
 462                     'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
 463         self.rules.append(pctgrule)
 464
 465         # Re-index all rules to avoid repeated RuleIndex
 466         idx = 1
 467         for r in self.rules:
 468             r['RuleIndex'] = idx
 469             idx += 1
 470
 471         if self.debug:
 472             #TODO: need to test and generate file name correctly
 473             data = {'RelationshipRules':self.rules, 'SupportedMetrics': [{"MetricName": name} for name in self.metrics]}
 474             self.json_dump(data, self.fullrulefname)
 475
 476         return
 477     # End of Rule Generator
 478
 479     def _storewldata(self, key):
 480         '''
 481         Store all the data of one workload into the corresponding data structure for all workloads.
 482         @param key: key to the dictionaries (index of self.workloads).
 483         '''
 484         self.allresults[key] = self.results
 485         self.allignoremetrics[key] = self.ignoremetrics
 486         self.allfailtests[key] = self.failtests
 487         self.alltotalcnt[key] = self.totalcnt
 488         self.allpassedcnt[key] = self.passedcnt
 489         self.allerrlist[key] = self.errlist
 490
 491     #Initialize data structures before data validation of each workload
 492     def _init_data(self):
 493
 494         testtypes = ['PositiveValueTest', 'RelationshipTest', 'SingleMetricTest']
 495         self.results = dict()
 496         self.ignoremetrics= set()
 497         self.errlist = list()
 498         self.failtests = {k:{'Total Tests':0, 'Passed Tests':0, 'Failed Tests':[]} for k in testtypes}
 499         self.totalcnt = 0
 500         self.passedcnt = 0
 501
 502     def test(self):
 503         '''
 504         The real entry point of the test framework.
 505         This function loads the validation rule JSON file and Standard Metric file to create rules for
 506         testing and namemap dictionaries.
 507         It also reads in result JSON file for testing.
 508
 509         In the test process, it passes through each rule and launch correct test function bases on the
 510         'TestType' field of the rule.
 511
 512         The final report is written into a JSON file.
 513         '''
 514         if not self.collectlist:
 515             self.parse_perf_metrics()
 516         self.create_rules()
 517         for i in range(0, len(self.workloads)):
 518             self.wlidx = i
 519             self._init_data()
 520             self.collect_perf(self.workloads[i])
 521             # Run positive value test
 522             self.pos_val_test()
 523             for r in self.rules:
 524                 # skip rules that uses metrics not exist in this platform
 525                 testtype = r['TestType']
 526                 if not self.check_rule(testtype, r['Metrics']):
 527                     continue
 528                 if  testtype == 'RelationshipTest':
 529                     self.relationship_test(r)
 530                 elif testtype == 'SingleMetricTest':
 531                     self.single_test(r)
 532                 else:
 533                     print("Unsupported Test Type: ", testtype)
 534                     self.errlist.append("Unsupported Test Type from rule: " + r['RuleIndex'])
 535             self._storewldata(i)
 536             print("Workload: ", self.workloads[i])
 537             print("Total metrics collected: ", self.failtests['PositiveValueTest']['Total Tests'])
 538             print("Non-negative metric count: ", self.failtests['PositiveValueTest']['Passed Tests'])
 539             print("Total Test Count: ", self.totalcnt)
 540             print("Passed Test Count: ", self.passedcnt)
 541
 542         self.create_report()
 543         return sum(self.alltotalcnt.values()) != sum(self.allpassedcnt.values())
 544 # End of Class Validator
 545
 546
 547 def main() -> None:
 548     parser = argparse.ArgumentParser(description="Launch metric value validation")
 549
 550     parser.add_argument("-rule", help="Base validation rule file", required=True)
 551     parser.add_argument("-output_dir", help="Path for validator output file, report file", required=True)
 552     parser.add_argument("-debug", help="Debug run, save intermediate data to files", action="store_true", default=False)
 553     parser.add_argument("-wl", help="Workload to run while data collection", default="true")
 554     parser.add_argument("-m", help="Metric list to validate", default="")
 555     args = parser.parse_args()
 556     outpath = Path(args.output_dir)
 557     reportf = Path.joinpath(outpath, 'perf_report.json')
 558     fullrule = Path.joinpath(outpath, 'full_rule.json')
 559     datafile = Path.joinpath(outpath, 'perf_data.json')
 560
 561     validator = Validator(args.rule, reportf, debug=args.debug,
 562                         datafname=datafile, fullrulefname=fullrule, workload=args.wl,
 563                         metrics=args.m)
 564     ret = validator.test()
 565
 566     return ret
 567
 568
 569 if __name__ == "__main__":
 570     import sys
 571     sys.exit(main())
 572
 573
 574