Improving rebuild timeline command

remotes/origin/enhancement/email-actions
Alejandro Alonso 2015-04-30 07:42:53 +02:00
parent 33a49cecff
commit a1870ab5b1
1 changed files with 46 additions and 20 deletions

View File

@ -17,6 +17,7 @@
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Model from django.db.models import Model
from django.db import reset_queries
from taiga.projects.models import Project from taiga.projects.models import Project
from taiga.projects.history import services as history_services from taiga.projects.history import services as history_services
@ -31,57 +32,82 @@ from taiga.users.models import User
from unittest.mock import patch from unittest.mock import patch
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
timeline_objects = [] import gc
created = None
class BulkCreator(object):
def __init__(self):
self.timeline_objects = []
self.created = None
def createElement(self, element):
self.timeline_objects.append(element)
if len(self.timeline_objects) > 1000:
Timeline.objects.bulk_create(self.timeline_objects, batch_size=1000)
del self.timeline_objects
self.timeline_objects = []
gc.collect()
bulk_creator = BulkCreator()
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
queryset = queryset.order_by('pk')
pk = queryset[0].pk
last_pk = queryset.order_by('-pk')[0].pk
while pk < last_pk:
for row in queryset.filter(pk__gt=pk)[:chunksize]:
pk = row.pk
yield row
gc.collect()
def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}): def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}):
global created
global timeline_objects
assert isinstance(obj, Model), "obj must be a instance of Model" assert isinstance(obj, Model), "obj must be a instance of Model"
assert isinstance(instance, Model), "instance must be a instance of Model" assert isinstance(instance, Model), "instance must be a instance of Model"
event_type_key = _get_impl_key_from_model(instance.__class__, event_type) event_type_key = _get_impl_key_from_model(instance.__class__, event_type)
impl = _timeline_impl_map.get(event_type_key, None) impl = _timeline_impl_map.get(event_type_key, None)
timeline_objects.append(Timeline( bulk_creator.createElement(Timeline(
content_object=obj, content_object=obj,
namespace=namespace, namespace=namespace,
event_type=event_type_key, event_type=event_type_key,
project=instance.project, project=instance.project,
data=impl(instance, extra_data=extra_data), data=impl(instance, extra_data=extra_data),
data_content_type = ContentType.objects.get_for_model(instance.__class__), data_content_type = ContentType.objects.get_for_model(instance.__class__),
created = created, created = bulk_creator.created,
)) ))
def bulk_create():
global timeline_objects
if len(timeline_objects) > 10000:
Timeline.objects.bulk_create(timeline_objects, batch_size=10000)
timeline_objects = []
def generate_timeline(): def generate_timeline():
global created
with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline): with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline):
# Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case # Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case
for project in Project.objects.order_by("created_date").iterator(): for project in queryset_iterator(Project.objects.order_by("created_date")):
created = project.created_date bulk_creator.created = project.created_date
print("Project:", created) print("Project:", bulk_creator.created)
extra_data = { extra_data = {
"values_diff": {}, "values_diff": {},
"user": extract_user_info(project.owner), "user": extract_user_info(project.owner),
} }
_push_to_timelines(project, project.owner, project, "create", extra_data=extra_data) _push_to_timelines(project, project.owner, project, "create", extra_data=extra_data)
bulk_create() del extra_data
for historyEntry in HistoryEntry.objects.order_by("created_at").iterator(): for historyEntry in queryset_iterator(HistoryEntry.objects.order_by("created_at")):
print("History entry:", historyEntry.created_at) print("History entry:", historyEntry.created_at)
try: try:
created = historyEntry.created_at bulk_creator.created = historyEntry.created_at
on_new_history_entry(None, historyEntry, None) on_new_history_entry(None, historyEntry, None)
except ObjectDoesNotExist as e: except ObjectDoesNotExist as e:
print("Ignoring") print("Ignoring")
bulk_create()
class Command(BaseCommand): class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):