Improving memory usage on project export

remotes/origin/logger
Alejandro Alonso 2015-09-18 11:45:10 +02:00 committed by David Barragán Merino
parent 76bb88b521
commit cde9b77c0d
12 changed files with 151 additions and 26 deletions

View File

@ -17,7 +17,8 @@
- Add endpoints to show the watchers list for issues, tasks and user stories. - Add endpoints to show the watchers list for issues, tasks and user stories.
- Add headers to allow threading for notification emails about changes to issues, tasks, user stories, and wiki pages. (thanks to [@brett](https://github.com/brettp)). - Add headers to allow threading for notification emails about changes to issues, tasks, user stories, and wiki pages. (thanks to [@brett](https://github.com/brettp)).
- Add externall apps: now Taiga can integrate with hundreds of applications and service. - Add externall apps: now Taiga can integrate with hundreds of applications and service.
- Improving searching system, now full text searchs are supported - Improve searching system, now full text searchs are supported
- Improve export system, now is more efficient and prevents possible crashes with heavy projects.
- i18n. - i18n.
- Add italian (it) translation. - Add italian (it) translation.
- Add polish (pl) translation. - Add polish (pl) translation.

View File

@ -117,7 +117,7 @@ DEFAULTS = {
"DATE_INPUT_FORMATS": ( "DATE_INPUT_FORMATS": (
ISO_8601, ISO_8601,
), ),
"DATE_FORMAT": None, "DATE_FORMAT": ISO_8601,
"DATETIME_INPUT_FORMATS": ( "DATETIME_INPUT_FORMATS": (
ISO_8601, ISO_8601,

View File

@ -18,7 +18,7 @@ from django.conf import settings
from django.core.files import storage from django.core.files import storage
import django_sites as sites import django_sites as sites
import os
class FileSystemStorage(storage.FileSystemStorage): class FileSystemStorage(storage.FileSystemStorage):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -30,3 +30,33 @@ class FileSystemStorage(storage.FileSystemStorage):
scheme = site.scheme and "{0}:".format(site.scheme) or "" scheme = site.scheme and "{0}:".format(site.scheme) or ""
self.base_url = url_tmpl.format(scheme=scheme, domain=site.domain, self.base_url = url_tmpl.format(scheme=scheme, domain=site.domain,
url=settings.MEDIA_URL) url=settings.MEDIA_URL)
def open(self, name, mode='rb'):
"""
Let's create the needed directory structrue before opening the file
"""
# Create any intermediate directories that do not exist.
# Note that there is a race between os.path.exists and os.makedirs:
# if os.makedirs fails with EEXIST, the directory was created
# concurrently, and we can continue normally. Refs #16082.
directory = os.path.dirname(name)
if not os.path.exists(directory):
try:
if self.directory_permissions_mode is not None:
# os.makedirs applies the global umask, so we reset it,
# for consistency with file_permissions_mode behavior.
old_umask = os.umask(0)
try:
os.makedirs(directory, self.directory_permissions_mode)
finally:
os.umask(old_umask)
else:
os.makedirs(directory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if not os.path.isdir(directory):
raise IOError("%s exists and is not a directory." % directory)
return super().open(name, mode=mode)

View File

@ -30,6 +30,8 @@ def loads(data):
data = force_text(data) data = force_text(data)
return json.loads(data) return json.loads(data)
load = json.load
# Some backward compatibility that should # Some backward compatibility that should
# be removed in near future. # be removed in near future.
to_json = dumps to_json = dumps

View File

@ -14,7 +14,6 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
import codecs import codecs
import uuid import uuid
@ -26,6 +25,7 @@ from django.conf import settings
from django.core.files.storage import default_storage from django.core.files.storage import default_storage
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from taiga.base.utils import json
from taiga.base.decorators import detail_route, list_route from taiga.base.decorators import detail_route, list_route
from taiga.base import exceptions as exc from taiga.base import exceptions as exc
from taiga.base import response from taiga.base import response
@ -67,10 +67,10 @@ class ProjectExporterViewSet(mixins.ImportThrottlingPolicyMixin, GenericViewSet)
return response.Accepted({"export_id": task.id}) return response.Accepted({"export_id": task.id})
path = "exports/{}/{}-{}.json".format(project.pk, project.slug, uuid.uuid4().hex) path = "exports/{}/{}-{}.json".format(project.pk, project.slug, uuid.uuid4().hex)
content = ContentFile(ExportRenderer().render(service.project_to_dict(project), storage_path = default_storage.path(path)
renderer_context={"indent": 4}).decode('utf-8')) with default_storage.open(storage_path, mode="w") as outfile:
service.render_project(project, outfile)
default_storage.save(path, content)
response_data = { response_data = {
"url": default_storage.url(path) "url": default_storage.url(path)
} }

View File

@ -18,7 +18,10 @@ from django.core.management.base import BaseCommand, CommandError
from taiga.projects.models import Project from taiga.projects.models import Project
from taiga.export_import.renderers import ExportRenderer from taiga.export_import.renderers import ExportRenderer
from taiga.export_import.service import project_to_dict from taiga.export_import.service import render_project
import resource
class Command(BaseCommand): class Command(BaseCommand):
@ -34,6 +37,5 @@ class Command(BaseCommand):
except Project.DoesNotExist: except Project.DoesNotExist:
raise CommandError('Project "%s" does not exist' % project_slug) raise CommandError('Project "%s" does not exist' % project_slug)
data = project_to_dict(project)
with open('%s.json'%(project_slug), 'w') as outfile: with open('%s.json'%(project_slug), 'w') as outfile:
self.renderer.render_to_file(data, outfile, renderer_context=self.renderer_context) render_project(project, outfile)

View File

@ -19,8 +19,7 @@ from django.db import transaction
from django.db.models import signals from django.db.models import signals
from optparse import make_option from optparse import make_option
import json from taiga.base.utils import json
from taiga.projects.models import Project from taiga.projects.models import Project
from taiga.export_import.renderers import ExportRenderer from taiga.export_import.renderers import ExportRenderer
from taiga.export_import.dump_service import dict_to_project, TaigaImportError from taiga.export_import.dump_service import dict_to_project, TaigaImportError

View File

@ -494,6 +494,8 @@ class RolePointsExportSerializer(serializers.ModelSerializer):
class MilestoneExportSerializer(WatcheableObjectModelSerializer): class MilestoneExportSerializer(WatcheableObjectModelSerializer):
owner = UserRelatedField(required=False) owner = UserRelatedField(required=False)
modified_date = serializers.DateTimeField(required=False) modified_date = serializers.DateTimeField(required=False)
estimated_start = serializers.DateField(required=False)
estimated_finish = serializers.DateField(required=False)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
project = kwargs.pop('project', None) project = kwargs.pop('project', None)

View File

@ -14,20 +14,28 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import uuid import base64
import gc
import resource
import os
import os.path as path import os.path as path
import uuid
from unidecode import unidecode from unidecode import unidecode
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.storage import default_storage
from taiga.base.utils import json
from taiga.projects.history.services import make_key_from_model_object, take_snapshot from taiga.projects.history.services import make_key_from_model_object, take_snapshot
from taiga.timeline.service import build_project_namespace from taiga.timeline.service import build_project_namespace
from taiga.projects.references import sequences as seq from taiga.projects.references import sequences as seq
from taiga.projects.references import models as refs from taiga.projects.references import models as refs
from taiga.projects.userstories.models import RolePoints from taiga.projects.userstories.models import RolePoints
from taiga.projects.services import find_invited_user from taiga.projects.services import find_invited_user
from taiga.base.api.fields import get_component
from . import serializers from . import serializers
@ -48,8 +56,81 @@ def add_errors(section, errors):
_errors_log[section] = [errors] _errors_log[section] = [errors]
def project_to_dict(project): def render_project(project, outfile, chunk_size = 8192):
return serializers.ProjectExportSerializer(project).data serializer = serializers.ProjectExportSerializer(project)
outfile.write('{\n')
first_field = True
for field_name in serializer.fields.keys():
# Avoid writing "," in the last element
if not first_field:
outfile.write(",\n")
else:
first_field = False
field = serializer.fields.get(field_name)
field.initialize(parent=serializer, field_name=field_name)
# These four "special" fields hava attachments so we use them in a special way
if field_name in ["wiki_pages", "user_stories", "tasks", "issues"]:
value = get_component(project, field_name)
outfile.write('"{}": [\n'.format(field_name))
attachments_field = field.fields.pop("attachments", None)
if attachments_field:
attachments_field.initialize(parent=field, field_name="attachments")
first_item = True
for item in value.iterator():
# Avoid writing "," in the last element
if not first_item:
outfile.write(",\n")
else:
first_item = False
dumped_value = json.dumps(field.to_native(item))
writing_value = dumped_value[:-1]+ ',\n "attachments": [\n'
outfile.write(writing_value)
first_attachment = True
for attachment in item.attachments.iterator():
# Avoid writing "," in the last element
if not first_attachment:
outfile.write(",\n")
else:
first_attachment = False
# Write all the data expect the serialized file
attachment_serializer = serializers.AttachmentExportSerializer(instance=attachment)
attached_file_serializer = attachment_serializer.fields.pop("attached_file")
dumped_value = json.dumps(attachment_serializer.data)
dumped_value = dumped_value[:-1] + ',\n "attached_file":{\n "data":"'
outfile.write(dumped_value)
# We write the attached_files by chunks so the memory used is not increased
attachment_file = attachment.attached_file
with default_storage.open(attachment_file.name) as f:
while True:
bin_data = f.read(chunk_size)
if not bin_data:
break
b64_data = base64.b64encode(bin_data).decode('utf-8')
outfile.write(b64_data)
outfile.write('", \n "name":"{}"}}\n}}'.format(os.path.basename(attachment_file.name)))
outfile.write(']}')
outfile.flush()
gc.collect()
outfile.write(']')
else:
value = field.field_to_native(project, field_name)
outfile.write('"{}": {}'.format(field_name, json.dumps(value)))
outfile.write('}\n')
def store_project(data): def store_project(data):

View File

@ -29,25 +29,26 @@ from djmail.template_mail import MagicMailBuilder, InlineCSSTemplateMail
from taiga.celery import app from taiga.celery import app
from .service import project_to_dict from .service import render_project
from .dump_service import dict_to_project from .dump_service import dict_to_project
from .renderers import ExportRenderer from .renderers import ExportRenderer
logger = logging.getLogger('taiga.export_import') logger = logging.getLogger('taiga.export_import')
import resource
@app.task(bind=True) @app.task(bind=True)
def dump_project(self, user, project): def dump_project(self, user, project):
mbuilder = MagicMailBuilder(template_mail_cls=InlineCSSTemplateMail) mbuilder = MagicMailBuilder(template_mail_cls=InlineCSSTemplateMail)
path = "exports/{}/{}-{}.json".format(project.pk, project.slug, self.request.id) path = "exports/{}/{}-{}.json".format(project.pk, project.slug, self.request.id)
storage_path = default_storage.path(path)
try: try:
content = ExportRenderer().render(project_to_dict(project), renderer_context={"indent": 4})
content = content.decode('utf-8')
content = ContentFile(content)
default_storage.save(path, content)
url = default_storage.url(path) url = default_storage.url(path)
with default_storage.open(storage_path, mode="w") as outfile:
render_project(project, outfile)
except Exception: except Exception:
ctx = { ctx = {
"user": user, "user": user,

View File

@ -910,7 +910,6 @@ def test_valid_milestone_import(client):
assert response.data["watchers"] == [user_watching.email] assert response.data["watchers"] == [user_watching.email]
def test_milestone_import_duplicated_milestone(client): def test_milestone_import_duplicated_milestone(client):
user = f.UserFactory.create() user = f.UserFactory.create()
project = f.ProjectFactory.create(owner=user) project = f.ProjectFactory.create(owner=user)

View File

@ -15,21 +15,29 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import pytest import pytest
import io
from .. import factories as f from .. import factories as f
from taiga.export_import.service import project_to_dict from taiga.base.utils import json
from taiga.export_import.service import render_project
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
def test_export_issue_finish_date(client): def test_export_issue_finish_date(client):
issue = f.IssueFactory.create(finished_date="2014-10-22") issue = f.IssueFactory.create(finished_date="2014-10-22")
finish_date = project_to_dict(issue.project)["issues"][0]["finished_date"] output = io.StringIO()
render_project(issue.project, output)
print(output.getvalue())
project_data = json.loads(output.getvalue())
finish_date = project_data["issues"][0]["finished_date"]
assert finish_date == "2014-10-22T00:00:00+0000" assert finish_date == "2014-10-22T00:00:00+0000"
def test_export_user_story_finish_date(client): def test_export_user_story_finish_date(client):
user_story = f.UserStoryFactory.create(finish_date="2014-10-22") user_story = f.UserStoryFactory.create(finish_date="2014-10-22")
finish_date = project_to_dict(user_story.project)["user_stories"][0]["finish_date"] output = io.StringIO()
render_project(user_story.project, output)
project_data = json.loads(output.getvalue())
finish_date = project_data["user_stories"][0]["finish_date"]
assert finish_date == "2014-10-22T00:00:00+0000" assert finish_date == "2014-10-22T00:00:00+0000"