Improving rebuild timeline command
parent
33a49cecff
commit
a1870ab5b1
|
@ -17,6 +17,7 @@
|
||||||
from django.core.exceptions import ObjectDoesNotExist
|
from django.core.exceptions import ObjectDoesNotExist
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.db.models import Model
|
from django.db.models import Model
|
||||||
|
from django.db import reset_queries
|
||||||
|
|
||||||
from taiga.projects.models import Project
|
from taiga.projects.models import Project
|
||||||
from taiga.projects.history import services as history_services
|
from taiga.projects.history import services as history_services
|
||||||
|
@ -31,57 +32,82 @@ from taiga.users.models import User
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
|
|
||||||
timeline_objects = []
|
import gc
|
||||||
created = None
|
|
||||||
|
class BulkCreator(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.timeline_objects = []
|
||||||
|
self.created = None
|
||||||
|
|
||||||
|
def createElement(self, element):
|
||||||
|
self.timeline_objects.append(element)
|
||||||
|
if len(self.timeline_objects) > 1000:
|
||||||
|
Timeline.objects.bulk_create(self.timeline_objects, batch_size=1000)
|
||||||
|
del self.timeline_objects
|
||||||
|
self.timeline_objects = []
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
bulk_creator = BulkCreator()
|
||||||
|
|
||||||
|
|
||||||
|
def queryset_iterator(queryset, chunksize=1000):
|
||||||
|
'''''
|
||||||
|
Iterate over a Django Queryset ordered by the primary key
|
||||||
|
|
||||||
|
This method loads a maximum of chunksize (default: 1000) rows in it's
|
||||||
|
memory at the same time while django normally would load all rows in it's
|
||||||
|
memory. Using the iterator() method only causes it to not preload all the
|
||||||
|
classes.
|
||||||
|
|
||||||
|
Note that the implementation of the iterator does not support ordered query sets.
|
||||||
|
'''
|
||||||
|
queryset = queryset.order_by('pk')
|
||||||
|
pk = queryset[0].pk
|
||||||
|
last_pk = queryset.order_by('-pk')[0].pk
|
||||||
|
while pk < last_pk:
|
||||||
|
for row in queryset.filter(pk__gt=pk)[:chunksize]:
|
||||||
|
pk = row.pk
|
||||||
|
yield row
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}):
|
def custom_add_to_object_timeline(obj:object, instance:object, event_type:str, namespace:str="default", extra_data:dict={}):
|
||||||
global created
|
|
||||||
global timeline_objects
|
|
||||||
assert isinstance(obj, Model), "obj must be a instance of Model"
|
assert isinstance(obj, Model), "obj must be a instance of Model"
|
||||||
assert isinstance(instance, Model), "instance must be a instance of Model"
|
assert isinstance(instance, Model), "instance must be a instance of Model"
|
||||||
event_type_key = _get_impl_key_from_model(instance.__class__, event_type)
|
event_type_key = _get_impl_key_from_model(instance.__class__, event_type)
|
||||||
impl = _timeline_impl_map.get(event_type_key, None)
|
impl = _timeline_impl_map.get(event_type_key, None)
|
||||||
|
|
||||||
timeline_objects.append(Timeline(
|
bulk_creator.createElement(Timeline(
|
||||||
content_object=obj,
|
content_object=obj,
|
||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
event_type=event_type_key,
|
event_type=event_type_key,
|
||||||
project=instance.project,
|
project=instance.project,
|
||||||
data=impl(instance, extra_data=extra_data),
|
data=impl(instance, extra_data=extra_data),
|
||||||
data_content_type = ContentType.objects.get_for_model(instance.__class__),
|
data_content_type = ContentType.objects.get_for_model(instance.__class__),
|
||||||
created = created,
|
created = bulk_creator.created,
|
||||||
))
|
))
|
||||||
|
|
||||||
def bulk_create():
|
|
||||||
global timeline_objects
|
|
||||||
if len(timeline_objects) > 10000:
|
|
||||||
Timeline.objects.bulk_create(timeline_objects, batch_size=10000)
|
|
||||||
timeline_objects = []
|
|
||||||
|
|
||||||
def generate_timeline():
|
def generate_timeline():
|
||||||
global created
|
|
||||||
with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline):
|
with patch('taiga.timeline.service._add_to_object_timeline', new=custom_add_to_object_timeline):
|
||||||
# Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case
|
# Projects api wasn't a HistoryResourceMixin so we can't interate on the HistoryEntries in this case
|
||||||
for project in Project.objects.order_by("created_date").iterator():
|
for project in queryset_iterator(Project.objects.order_by("created_date")):
|
||||||
created = project.created_date
|
bulk_creator.created = project.created_date
|
||||||
print("Project:", created)
|
print("Project:", bulk_creator.created)
|
||||||
extra_data = {
|
extra_data = {
|
||||||
"values_diff": {},
|
"values_diff": {},
|
||||||
"user": extract_user_info(project.owner),
|
"user": extract_user_info(project.owner),
|
||||||
}
|
}
|
||||||
_push_to_timelines(project, project.owner, project, "create", extra_data=extra_data)
|
_push_to_timelines(project, project.owner, project, "create", extra_data=extra_data)
|
||||||
bulk_create()
|
del extra_data
|
||||||
|
|
||||||
for historyEntry in HistoryEntry.objects.order_by("created_at").iterator():
|
for historyEntry in queryset_iterator(HistoryEntry.objects.order_by("created_at")):
|
||||||
print("History entry:", historyEntry.created_at)
|
print("History entry:", historyEntry.created_at)
|
||||||
try:
|
try:
|
||||||
created = historyEntry.created_at
|
bulk_creator.created = historyEntry.created_at
|
||||||
on_new_history_entry(None, historyEntry, None)
|
on_new_history_entry(None, historyEntry, None)
|
||||||
except ObjectDoesNotExist as e:
|
except ObjectDoesNotExist as e:
|
||||||
print("Ignoring")
|
print("Ignoring")
|
||||||
|
|
||||||
bulk_create()
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
Loading…
Reference in New Issue