tools/android/nullaway/java_file_stats.py - chromium/src - Git at Google

 #!/usr/bin/env python3
 # Copyright 2025 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Tool to collect stats about progress of adding @NullMarked."""

 import argparse
 import collections
 import csv
 from datetime import date
 import logging
 import pathlib
 import sys
 import time

 _SRC_ROOT = pathlib.Path(__file__).resolve().parents[3]

 _EXCLUDED_SUBDIRS = ('out', 'third_party')
 _TEST_PATH_SUBSTRINGS = ('test', 'Test')

 _SUBDIRS_FOR_STATS = [
     'base',
     'chrome',
     'android_webview',
     'components',
     'content',
     'clank',
 ]


 def _is_test(file):
     return any(s in str(file.absolute()) for s in _TEST_PATH_SUBSTRINGS)


 def _collect_java_files(start_dir):
     path = pathlib.Path(start_dir)
     for file in path.glob('**/*.java'):
         file.resolve()
         if not file.is_file():
             continue
         # Ignore files in excluded subdirs.
         for excluded_subdir in _EXCLUDED_SUBDIRS:
             if (_SRC_ROOT / excluded_subdir) in file.parents:
                 break
         else:
             yield file


 def _check_if_marked(java_files):
     marked_all = set()
     unmarked_all = set()
     nomark_all = set()
     for path in java_files:
         data = path.read_text()
         marked = '@NullMarked' in data
         unmarked = '@NullUnmarked' in data
         if marked:
             marked_all.add(path)
         elif not unmarked:
             nomark_all.add(path)
         if unmarked:
             unmarked_all.add(path)

     return marked_all, nomark_all, unmarked_all


 def _breadown_stats_by_subdir(files):
     ret = collections.defaultdict(int)
     for file in files:
         for subdir in _SUBDIRS_FOR_STATS:
             if (_SRC_ROOT / subdir) in file.parents:
                 ret[subdir] += 1
     return ret


 def _print_stats(marked_all, nomark_all, unmarked_all):
     marked_by_subdirs = _breadown_stats_by_subdir(marked_all)
     nomark_by_subdirs = _breadown_stats_by_subdir(nomark_all)
     unmarked_by_subdirs = _breadown_stats_by_subdir(unmarked_all)
     count_marked = len(marked_all)
     count_nomark = len(nomark_all)
     count_unmarked = len(unmarked_all)
     total = count_marked + count_nomark

     def stat(c, t):
         pct_string = str(round(c / t * 100)) if t != 0 else '-'
         return f'{c}/{t} ({pct_string}%)'

     print()
     print(f'Overall:')
     print(f'  @NullMarked:', stat(count_marked, total))
     print(f'  Neither:', stat(count_nomark, total))
     print(f'  @NullUnmarked:', stat(count_unmarked, total))
     print()
     print(f'By Directory (@NullMarked / Neither / @NullUnmarked):')
     for subdir in _SUBDIRS_FOR_STATS:
         subdir_marked_count = marked_by_subdirs[subdir]
         subdir_nomark_count = nomark_by_subdirs[subdir]
         subdir_unmarked_count = unmarked_by_subdirs[subdir]
         subdir_total = subdir_marked_count + subdir_nomark_count
         # Skip non-existent subdirs.
         if subdir_total == 0:
             continue
         print(f'  //{subdir}:', stat(subdir_marked_count, subdir_total), '/',
               stat(subdir_nomark_count, subdir_total), '/',
               stat(subdir_unmarked_count, subdir_total))


 def _read_file_list(filepath):
     with open(filepath, 'rt') as f:
         return (pathlib.Path(java_file.strip()) for java_file in f.readlines())


 def _write_file_list(filepath, filelist):
     sorted_filelist = sorted(filelist)
     with open(filepath, 'wt') as f:
         f.writelines(f'{str(p)}\n' for p in sorted_filelist)


 def main():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument('-C',
                         dest='src_dir',
                         default=_SRC_ROOT,
                         help='Path to CHROMIUM_SRC.')
     parser.add_argument(
         '--unmarked-list-path',
         help='Path to output the list of files with @NullUnmarked.')
     parser.add_argument(
         '--marked-list-path',
         help='Path to output the list of files with @NullMarked.')
     parser.add_argument(
         '--nomark-list-path',
         help='Path to output the list of files without any annotation.')
     parser.add_argument(
         '--cached-file-list',
         help='Path to list of java files instead of walking the tree.')
     parser.add_argument(
         '--output-file-list',
         help='Path to output list of java files for use by --cached-file-list.'
     )
     parser.add_argument('--csv', action='store_true', help='Output a .csv')
     parser.add_argument('-v', '--verbose', action='store_true')
     options = parser.parse_args(sys.argv[1:])

     logging_level = logging.INFO
     if options.verbose:
         logging_level = logging.DEBUG
     logging.basicConfig(level=logging_level)

     if options.cached_file_list and options.output_file_list:
         parser.error(
             'Cant pass in both --cached-file-list and --output-file-list')

     logging.info('Collecting java files')
     start = time.time()
     if options.cached_file_list:
         java_files = _read_file_list(options.cached_file_list)
     else:
         java_files = list(_collect_java_files(options.src_dir))
     logging.info(f'Collecting java files done in {time.time()-start:.1f}s')

     logging.info('Processing files')
     start = time.time()
     marked, nomark, unmarked = _check_if_marked(java_files)
     logging.info(f'Processing files files done in {time.time()-start:.1f}s')

     if options.unmarked_list_path:
         _write_file_list(options.unmarked_list_path, unmarked)
     if options.marked_list_path:
         _write_file_list(options.marked_list_path, marked)
     if options.nomark_list_path:
         _write_file_list(options.nomark_list_path, nomark)
     if options.output_file_list:
         _write_file_list(options.output_file_list, java_files)

     logging.info('Calculating stats')
     start = time.time()
     marked_tests = {x for x in marked if _is_test(x)}
     marked.difference_update(marked_tests)
     nomark_tests = {x for x in nomark if _is_test(x)}
     nomark.difference_update(nomark_tests)
     unmarked_tests = {x for x in unmarked if _is_test(x)}
     unmarked.difference_update(unmarked_tests)

     if options.csv:
         csv.writer(sys.stdout).writerow(
             (date.today(), len(marked), len(nomark), len(unmarked),
              len(marked_tests), len(nomark_tests), len(unmarked_tests)))
     else:
         print(date.today())
         print('==== Non-test Files ====')
         _print_stats(marked, nomark, unmarked)
         print()
         print('====== Test Files ======')
         _print_stats(marked_tests, nomark_tests, unmarked_tests)
     logging.info(f'Calculating stats done in {time.time()-start:.1f}s')


 if __name__ == '__main__':
     sys.exit(main())
	#!/usr/bin/env python3
	# Copyright 2025 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Tool to collect stats about progress of adding @NullMarked."""

	import argparse
	import collections
	import csv
	from datetime import date
	import logging
	import pathlib
	import sys
	import time

	_SRC_ROOT = pathlib.Path(__file__).resolve().parents[3]

	_EXCLUDED_SUBDIRS = ('out', 'third_party')
	_TEST_PATH_SUBSTRINGS = ('test', 'Test')

	_SUBDIRS_FOR_STATS = [
	'base',
	'chrome',
	'android_webview',
	'components',
	'content',
	'clank',
	]


	def _is_test(file):
	return any(s in str(file.absolute()) for s in _TEST_PATH_SUBSTRINGS)


	def _collect_java_files(start_dir):
	path = pathlib.Path(start_dir)
	for file in path.glob('*/.java'):
	file.resolve()
	if not file.is_file():
	continue
	# Ignore files in excluded subdirs.
	for excluded_subdir in _EXCLUDED_SUBDIRS:
	if (_SRC_ROOT / excluded_subdir) in file.parents:
	break
	else:
	yield file


	def _check_if_marked(java_files):
	marked_all = set()
	unmarked_all = set()
	nomark_all = set()
	for path in java_files:
	data = path.read_text()
	marked = '@NullMarked' in data
	unmarked = '@NullUnmarked' in data
	if marked:
	marked_all.add(path)
	elif not unmarked:
	nomark_all.add(path)
	if unmarked:
	unmarked_all.add(path)

	return marked_all, nomark_all, unmarked_all


	def _breadown_stats_by_subdir(files):
	ret = collections.defaultdict(int)
	for file in files:
	for subdir in _SUBDIRS_FOR_STATS:
	if (_SRC_ROOT / subdir) in file.parents:
	ret[subdir] += 1
	return ret


	def _print_stats(marked_all, nomark_all, unmarked_all):
	marked_by_subdirs = _breadown_stats_by_subdir(marked_all)
	nomark_by_subdirs = _breadown_stats_by_subdir(nomark_all)
	unmarked_by_subdirs = _breadown_stats_by_subdir(unmarked_all)
	count_marked = len(marked_all)
	count_nomark = len(nomark_all)
	count_unmarked = len(unmarked_all)
	total = count_marked + count_nomark

	def stat(c, t):
	pct_string = str(round(c / t * 100)) if t != 0 else '-'
	return f'{c}/{t} ({pct_string}%)'

	print()
	print(f'Overall:')
	print(f' @NullMarked:', stat(count_marked, total))
	print(f' Neither:', stat(count_nomark, total))
	print(f' @NullUnmarked:', stat(count_unmarked, total))
	print()
	print(f'By Directory (@NullMarked / Neither / @NullUnmarked):')
	for subdir in _SUBDIRS_FOR_STATS:
	subdir_marked_count = marked_by_subdirs[subdir]
	subdir_nomark_count = nomark_by_subdirs[subdir]
	subdir_unmarked_count = unmarked_by_subdirs[subdir]
	subdir_total = subdir_marked_count + subdir_nomark_count
	# Skip non-existent subdirs.
	if subdir_total == 0:
	continue
	print(f' //{subdir}:', stat(subdir_marked_count, subdir_total), '/',
	stat(subdir_nomark_count, subdir_total), '/',
	stat(subdir_unmarked_count, subdir_total))


	def _read_file_list(filepath):
	with open(filepath, 'rt') as f:
	return (pathlib.Path(java_file.strip()) for java_file in f.readlines())


	def _write_file_list(filepath, filelist):
	sorted_filelist = sorted(filelist)
	with open(filepath, 'wt') as f:
	f.writelines(f'{str(p)}\n' for p in sorted_filelist)


	def main():
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument('-C',
	dest='src_dir',
	default=_SRC_ROOT,
	help='Path to CHROMIUM_SRC.')
	parser.add_argument(
	'--unmarked-list-path',
	help='Path to output the list of files with @NullUnmarked.')
	parser.add_argument(
	'--marked-list-path',
	help='Path to output the list of files with @NullMarked.')
	parser.add_argument(
	'--nomark-list-path',
	help='Path to output the list of files without any annotation.')
	parser.add_argument(
	'--cached-file-list',
	help='Path to list of java files instead of walking the tree.')
	parser.add_argument(
	'--output-file-list',
	help='Path to output list of java files for use by --cached-file-list.'
	)
	parser.add_argument('--csv', action='store_true', help='Output a .csv')
	parser.add_argument('-v', '--verbose', action='store_true')
	options = parser.parse_args(sys.argv[1:])

	logging_level = logging.INFO
	if options.verbose:
	logging_level = logging.DEBUG
	logging.basicConfig(level=logging_level)

	if options.cached_file_list and options.output_file_list:
	parser.error(
	'Cant pass in both --cached-file-list and --output-file-list')

	logging.info('Collecting java files')
	start = time.time()
	if options.cached_file_list:
	java_files = _read_file_list(options.cached_file_list)
	else:
	java_files = list(_collect_java_files(options.src_dir))
	logging.info(f'Collecting java files done in {time.time()-start:.1f}s')

	logging.info('Processing files')
	start = time.time()
	marked, nomark, unmarked = _check_if_marked(java_files)
	logging.info(f'Processing files files done in {time.time()-start:.1f}s')

	if options.unmarked_list_path:
	_write_file_list(options.unmarked_list_path, unmarked)
	if options.marked_list_path:
	_write_file_list(options.marked_list_path, marked)
	if options.nomark_list_path:
	_write_file_list(options.nomark_list_path, nomark)
	if options.output_file_list:
	_write_file_list(options.output_file_list, java_files)

	logging.info('Calculating stats')
	start = time.time()
	marked_tests = {x for x in marked if _is_test(x)}
	marked.difference_update(marked_tests)
	nomark_tests = {x for x in nomark if _is_test(x)}
	nomark.difference_update(nomark_tests)
	unmarked_tests = {x for x in unmarked if _is_test(x)}
	unmarked.difference_update(unmarked_tests)

	if options.csv:
	csv.writer(sys.stdout).writerow(
	(date.today(), len(marked), len(nomark), len(unmarked),
	len(marked_tests), len(nomark_tests), len(unmarked_tests)))
	else:
	print(date.today())
	print('==== Non-test Files ====')
	_print_stats(marked, nomark, unmarked)
	print()
	print('====== Test Files ======')
	_print_stats(marked_tests, nomark_tests, unmarked_tests)
	logging.info(f'Calculating stats done in {time.time()-start:.1f}s')


	if __name__ == '__main__':
	sys.exit(main())