Improving rebuild timeline command

remotes/origin/enhancement/email-actions
Alejandro Alonso 2015-04-30 07:42:53 +02:00
parent 33a49cecff
commit a1870ab5b1
1 changed files with 46 additions and 20 deletions

View File

@ -17,6 +17,7 @@
from django.core.exceptions import ObjectDoesNotExist
from django.core.management.base import BaseCommand
from django.db.models import Model
from django.db import reset_queries
from taiga.projects.models import Project
from taiga.projects.history import services as history_services
@ -31,57 +32,82 @@ from taiga.users.models import User
from unittest.mock import patch
from django.contrib.contenttypes.models import ContentType
timeline_objects = []
created = None
import gc
class BulkCreator(object):
def __init__(self):
self.timeline_objects = []
self.created = None
def createElement(self, element):
self.timeline_objects.append(element)
if len(self.timeline_objects) > 1000:
Timeline.objects.bulk_create(self.timeline_objects, batch_size=1000)
del self.timeline_objects
self.timeline_objects = []
gc.collect()
bulk_creator = BulkCreator()
def queryset_iterator(queryset, chunksize=1000):
'''''
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
'''
queryset = queryset.order_by('pk')
pk = queryset[0].pk
last_pk = queryset.order_by('-pk')[0].pk
while pk < last_pk:
for row in queryset.filter(pk__gt=pk)[:chunksize]:
pk = row.pk
yield row
gc.collect()
def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}):
global created
global timeline_objects
assert isinstance(obj, Model), "obj must be a instance of Model"
assert isinstance(instance, Model), "instance must be a instance of Model"
event_type_key = _get_impl_key_from_model(instance.__class__, event_type)
impl = _timeline_impl_map.get(event_type_key, None)
timeline_objects.append(Timeline(
bulk_creator.createElement(Timeline(
content_object=obj,
namespace=namespace,
event_type=event_type_key,
project=instance.project,
data=impl(instance, extra_data=extra_data),
data_content_type = ContentType.objects.get_for_model(instance.__class__),
created = created,
created = bulk_creator.created,
))
def bulk_create():
global timeline_objects
if len(timeline_objects) > 10000:
Timeline.objects.bulk_create(timeline_objects, batch_size=10000)
timeline_objects = []
def generate_timeline():
global created
with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline):
# Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case
for project in Project.objects.order_by("created_date").iterator():
created = project.created_date
print("Project:", created)
for project in queryset_iterator(Project.objects.order_by("created_date")):
bulk_creator.created = project.created_date
print("Project:", bulk_creator.created)
extra_data = {
"values_diff": {},
"user": extract_user_info(project.owner),
}
_push_to_timelines(project, project.owner, project, "create", extra_data=extra_data)
bulk_create()
del extra_data
for historyEntry in HistoryEntry.objects.order_by("created_at").iterator():
for historyEntry in queryset_iterator(HistoryEntry.objects.order_by("created_at")):
print("History entry:", historyEntry.created_at)
try:
created = historyEntry.created_at
bulk_creator.created = historyEntry.created_at
on_new_history_entry(None, historyEntry, None)
except ObjectDoesNotExist as e:
print("Ignoring")
bulk_create()
class Command(BaseCommand):
def handle(self, *args, **options):