Samuel Huang | 775b551 | 2020-03-23 19:12:58 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Avi Drissman | dfd88085 | 2022-09-15 20:11:09 | [diff] [blame] | 2 | # Copyright 2019 The Chromium Authors |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Prints the large commits given a .csv file from a telemetry size graph.""" |
| 7 | |
| 8 | import argparse |
| 9 | import re |
| 10 | import subprocess |
| 11 | |
| 12 | |
Andrew Grieve | fb851009 | 2022-09-13 14:22:39 | [diff] [blame] | 13 | # Commit ranges where perf bot was giving invalid results. |
| 14 | # Range objects implement __contains__ for fast "in" operators. |
| 15 | _BAD_COMMIT_RANGES = [ |
| 16 | range(1045024, 1045552), # https://crbug.com/1361952 |
| 17 | ] |
| 18 | |
| 19 | |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 20 | def _ReadCsv(path): |
| 21 | """Returns the contents of the .csv as a list of (int, int).""" |
| 22 | ret = [] |
| 23 | with open(path) as f: |
| 24 | for line in f: |
| 25 | parts = line.rstrip().split(',') |
| 26 | if len(parts) == 2 and parts[0] != 'revision': |
| 27 | ret.append((int(parts[0]), int(float(parts[1])))) |
| 28 | return ret |
| 29 | |
| 30 | |
| 31 | def _FindBigDeltas(revs_and_sizes, increase_threshold, decrease_threshold): |
| 32 | """Filters revs_and_sizes for entries that grow/shrink too much.""" |
| 33 | big_jumps = [] |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 34 | prev_rev, prev_size = revs_and_sizes[0] |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 35 | for rev, size in revs_and_sizes: |
| 36 | delta = size - prev_size |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 37 | if delta > increase_threshold or -delta > decrease_threshold: |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 38 | big_jumps.append((rev, delta, prev_rev)) |
| 39 | prev_rev = rev |
| 40 | prev_size = size |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 41 | return big_jumps |
| 42 | |
| 43 | |
| 44 | def _LookupCommitInfo(rev): |
Andrew Grieve | edcaf9f | 2020-04-08 16:05:47 | [diff] [blame] | 45 | sha1 = subprocess.check_output( |
| 46 | ['git', 'crrev-parse', str(rev)], encoding="utf-8").strip() |
Andrew Grieve | e93ed6f | 2022-06-30 02:18:42 | [diff] [blame] | 47 | if not sha1: |
| 48 | raise Exception(f'git crrev-parse for {rev} failed. Probably need to ' |
| 49 | f'"git fetch origin main"') |
Andrew Grieve | edcaf9f | 2020-04-08 16:05:47 | [diff] [blame] | 50 | desc = subprocess.check_output(['git', 'log', '-n1', sha1], encoding="utf-8") |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 51 | author = re.search(r'Author: .*?<(.*?)>', desc).group(1) |
| 52 | day, year = re.search(r'Date:\s+\w+\s+(\w+ \d+)\s+.*?\s+(\d+)', desc).groups() |
| 53 | date = '{} {}'.format(day, year) |
| 54 | title = re.search(r'\n +(\S.*)', desc).group(1).replace('\t', ' ') |
Andrew Grieve | 2ca52f5 | 2023-07-07 05:16:59 | [diff] [blame] | 55 | return sha1, author, date, title |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 56 | |
| 57 | |
| 58 | def main(): |
| 59 | parser = argparse.ArgumentParser() |
| 60 | parser.add_argument( |
| 61 | '--increase-threshold', |
| 62 | type=int, |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 63 | default=30 * 1024, |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 64 | help='Minimum number of bytes larger to be considered a notable.') |
| 65 | parser.add_argument( |
| 66 | '--decrease-threshold', |
| 67 | type=int, |
| 68 | default=30 * 1024, |
| 69 | help='Minimum number of bytes smaller to be considered a notable.') |
| 70 | parser.add_argument( |
| 71 | 'points_csv', help='Input .csv file with columns: revision,value') |
| 72 | options = parser.parse_args() |
| 73 | |
| 74 | revs_and_sizes = _ReadCsv(options.points_csv) |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 75 | big_deltas = _FindBigDeltas(revs_and_sizes, options.increase_threshold, |
| 76 | options.decrease_threshold) |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 77 | |
Raul Tambre | 57e09d6 | 2019-09-22 17:18:52 | [diff] [blame] | 78 | print('Printing info for up to {} commits in the range {}-{}'.format( |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 79 | len(big_deltas), revs_and_sizes[0][0], revs_and_sizes[-1][0])) |
Andrew Grieve | 2ca52f5 | 2023-07-07 05:16:59 | [diff] [blame] | 80 | print('Revision,Hash,Title,Author,Delta,Date') |
Andrew Grieve | fb851009 | 2022-09-13 14:22:39 | [diff] [blame] | 81 | num_bad_commits = 0 |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 82 | for rev, delta, prev_rev in big_deltas: |
Andrew Grieve | fb851009 | 2022-09-13 14:22:39 | [diff] [blame] | 83 | if any(rev in r for r in _BAD_COMMIT_RANGES): |
| 84 | num_bad_commits += 1 |
| 85 | continue |
Andrew Grieve | 2ca52f5 | 2023-07-07 05:16:59 | [diff] [blame] | 86 | sha1, author, date, title = _LookupCommitInfo(rev) |
Andrew Grieve | 4fde1a9 | 2022-03-23 17:34:59 | [diff] [blame] | 87 | rev_str = str(rev) |
| 88 | if rev - prev_rev > 1: |
| 89 | rev_str = f'{prev_rev}..{rev}' |
Andrew Grieve | 2ca52f5 | 2023-07-07 05:16:59 | [diff] [blame] | 90 | print('\t'.join([rev_str, sha1, title, author, str(delta), date])) |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 91 | |
Andrew Grieve | fb851009 | 2022-09-13 14:22:39 | [diff] [blame] | 92 | if num_bad_commits: |
| 93 | print(f'Ignored {num_bad_commits} commits from bad ranges') |
| 94 | |
Andrew Grieve | 0bbc6fa | 2019-07-10 16:14:39 | [diff] [blame] | 95 | |
| 96 | if __name__ == '__main__': |
| 97 | main() |