diff --git a/taiga/timeline/management/commands/rebuild_timeline.py b/taiga/timeline/management/commands/rebuild_timeline.py index f5ba3138..7747eae6 100644 --- a/taiga/timeline/management/commands/rebuild_timeline.py +++ b/taiga/timeline/management/commands/rebuild_timeline.py @@ -17,6 +17,7 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.management.base import BaseCommand from django.db.models import Model +from django.db import reset_queries from taiga.projects.models import Project from taiga.projects.history import services as history_services @@ -31,57 +32,82 @@ from taiga.users.models import User from unittest.mock import patch from django.contrib.contenttypes.models import ContentType -timeline_objects = [] -created = None +import gc + +class BulkCreator(object): + def __init__(self): + self.timeline_objects = [] + self.created = None + + def createElement(self, element): + self.timeline_objects.append(element) + if len(self.timeline_objects) > 1000: + Timeline.objects.bulk_create(self.timeline_objects, batch_size=1000) + del self.timeline_objects + self.timeline_objects = [] + gc.collect() + +bulk_creator = BulkCreator() + + +def queryset_iterator(queryset, chunksize=1000): + ''''' + Iterate over a Django Queryset ordered by the primary key + + This method loads a maximum of chunksize (default: 1000) rows in it's + memory at the same time while django normally would load all rows in it's + memory. Using the iterator() method only causes it to not preload all the + classes. + + Note that the implementation of the iterator does not support ordered query sets. + ''' + queryset = queryset.order_by('pk') + pk = queryset[0].pk + last_pk = queryset.order_by('-pk')[0].pk + while pk < last_pk: + for row in queryset.filter(pk__gt=pk)[:chunksize]: + pk = row.pk + yield row + gc.collect() def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}): - global created - global timeline_objects assert isinstance(obj, Model), "obj must be a instance of Model" assert isinstance(instance, Model), "instance must be a instance of Model" event_type_key = _get_impl_key_from_model(instance.__class__, event_type) impl = _timeline_impl_map.get(event_type_key, None) - timeline_objects.append(Timeline( + bulk_creator.createElement(Timeline( content_object=obj, namespace=namespace, event_type=event_type_key, project=instance.project, data=impl(instance, extra_data=extra_data), data_content_type = ContentType.objects.get_for_model(instance.__class__), - created = created, + created = bulk_creator.created, )) -def bulk_create(): - global timeline_objects - if len(timeline_objects) > 10000: - Timeline.objects.bulk_create(timeline_objects, batch_size=10000) - timeline_objects = [] def generate_timeline(): - global created with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline): # Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case - for project in Project.objects.order_by("created_date").iterator(): - created = project.created_date - print("Project:", created) + for project in queryset_iterator(Project.objects.order_by("created_date")): + bulk_creator.created = project.created_date + print("Project:", bulk_creator.created) extra_data = { "values_diff": {}, "user": extract_user_info(project.owner), } _push_to_timelines(project, project.owner, project, "create", extra_data=extra_data) - bulk_create() + del extra_data - for historyEntry in HistoryEntry.objects.order_by("created_at").iterator(): + for historyEntry in queryset_iterator(HistoryEntry.objects.order_by("created_at")): print("History entry:", historyEntry.created_at) try: - created = historyEntry.created_at + bulk_creator.created = historyEntry.created_at on_new_history_entry(None, historyEntry, None) except ObjectDoesNotExist as e: print("Ignoring") - bulk_create() - class Command(BaseCommand): def handle(self, *args, **options):