-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
chore(eco): Refactors organization report building #96917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
chore(eco): Refactors organization report building #96917
Conversation
…zation scheduling
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #96917 +/- ##
========================================
Coverage 80.68% 80.69%
========================================
Files 8498 8506 +8
Lines 374269 374689 +420
Branches 24290 24290
========================================
+ Hits 301996 302361 +365
- Misses 71896 71951 +55
Partials 377 377 |
with sentry_sdk.start_span(op="weekly_reports.user_project_ownership"): | ||
for project_id, user_id in OrganizationMember.objects.filter( | ||
organization_id=ctx.organization.id, | ||
teams__projectteam__project__isnull=False, | ||
teams__status=TeamStatus.ACTIVE, | ||
).values_list("teams__projectteam__project_id", "user_id"): | ||
if user_id is not None: | ||
ctx.project_ownership.setdefault(user_id, set()).add(project_id) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Each of these chunks is copied pretty much verbatim from the prepare_organization_report
task. It just allows us to individually wrap chunks in SLOs if we decide to go this route.
I could also see a scenario where we run each of these steps on a per project context basis and remove all of the nested mutability of passing around a partially populated OrganozationReportContext
object. Future goal though 😅
""" | ||
Returns True if every project context is empty. | ||
""" | ||
return all( | ||
project_ctx.check_if_project_is_empty() | ||
for project_ctx in self.projects_context_map.values() | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replaced the util for this with an actual helper method for simplicity.
@dataclass | ||
class WeeklyReportProgressTracker: | ||
""" | ||
This class is used to track the last processed org ID for a given | ||
weekly report. It can either be configured with an explicit start time and | ||
watermark TTL, or it will assume beginning of day, with a 7 day TTL. | ||
""" | ||
|
||
beginning_of_day_timestamp: float | ||
duration: int | ||
_redis_connection: LocalClient | ||
|
||
REPORT_REDIS_CLIENT_KEY: Final[str] = "weekly_reports_org_id_min" | ||
|
||
def __init__(self, timestamp: float | None = None, duration: int | None = None): | ||
if timestamp is None: | ||
# The time that the report was generated | ||
timestamp = floor_to_utc_day(timezone.now()).timestamp() | ||
|
||
self.beginning_of_day_timestamp = timestamp | ||
|
||
if duration is None: | ||
# The total timespan that the task covers | ||
duration = ONE_DAY * 7 | ||
|
||
self.duration = duration | ||
self._redis_connection = redis.clusters.get("default").get_local_client_for_key( | ||
self.REPORT_REDIS_CLIENT_KEY | ||
) | ||
|
||
@property | ||
def min_org_id_redis_key(self) -> str: | ||
return f"{self.REPORT_REDIS_CLIENT_KEY}:{self.beginning_of_day_timestamp}" | ||
|
||
def get_last_processed_org_id(self) -> int | None: | ||
min_org_id_from_redis = self._redis_connection.get(self.min_org_id_redis_key) | ||
return int(min_org_id_from_redis) if min_org_id_from_redis else None | ||
|
||
def set_last_processed_org_id(self, org_id: int) -> None: | ||
self._redis_connection.set(self.min_org_id_redis_key, org_id) | ||
|
||
def delete_min_org_id(self) -> None: | ||
self._redis_connection.delete(self.min_org_id_redis_key) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is part of the parent PR, ignore pls
if timestamp is None: | ||
# The time that the report was generated | ||
timestamp = floor_to_utc_day(timezone.now()).timestamp() | ||
|
||
if duration is None: | ||
# The total timespan that the task covers | ||
duration = ONE_DAY * 7 | ||
|
||
batch_id = str(uuid.uuid4()) | ||
|
||
def min_org_id_redis_key(timestamp: float) -> str: | ||
return f"weekly_reports_org_id_min:{timestamp}" | ||
|
||
redis_cluster = redis.clusters.get("default").get_local_client_for_key( | ||
"weekly_reports_org_id_min" | ||
) | ||
|
||
min_org_id_from_redis = redis_cluster.get(min_org_id_redis_key(timestamp)) | ||
minimum_organization_id = int(min_org_id_from_redis) if min_org_id_from_redis else None | ||
batching = WeeklyReportProgressTracker(timestamp, duration) | ||
minimum_organization_id = batching.get_last_processed_org_id() | ||
|
||
organizations = Organization.objects.filter(status=OrganizationStatus.ACTIVE) | ||
|
||
for organization in RangeQuerySetWrapper( | ||
organizations, | ||
step=10000, | ||
result_value_getter=lambda item: item.id, | ||
min_id=minimum_organization_id, | ||
): | ||
# Create a celery task per organization | ||
logger.info( | ||
"weekly_reports.schedule_organizations", | ||
extra={ | ||
"batch_id": str(batch_id), | ||
"organization": organization.id, | ||
"minimum_organization_id": minimum_organization_id, | ||
}, | ||
) | ||
prepare_organization_report.delay( | ||
timestamp, duration, organization.id, batch_id, dry_run=dry_run | ||
) | ||
redis_cluster.set(min_org_id_redis_key(timestamp), organization.id) | ||
with WeeklyReportSLO( | ||
operation_type=WeeklyReportOperationType.SCHEDULE_ORGANIZATION_REPORTS | ||
).capture() as lifecycle: | ||
try: | ||
batch_id = str(uuid.uuid4()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same with these changes.
685b49f
to
e54c100
Compare
Continuation of #96869, merge that first
Includes the following refactors:
OrganizationReportContextFactory
to clean up theprepare_organization_report
task.is_empty
check logic toOrganizationReportContext
class, from the dedicated util where it lived before.ProjectContext
checks when building org contexts.Why these changes?
This is a precursor for adding metrics, SLOs, and logging for organiztion reporting logic.
This also will precede SLOs for individual email sending, which will come in a later PR as well.