From 520f383449aa475189f13190897a5a22539d2a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Espino?= Date: Wed, 15 Jun 2016 20:06:46 +0200 Subject: [PATCH] [Backport] Add gzip support to exports --- taiga/export_import/api.py | 21 +++++++--- .../management/commands/dump_project.py | 19 +++++++-- taiga/export_import/services/render.py | 32 +++++++------- taiga/export_import/tasks.py | 27 ++++++++---- tests/integration/test_exporter_api.py | 42 ++++++++++++++++++- tests/unit/test_export.py | 4 +- 6 files changed, 109 insertions(+), 36 deletions(-) diff --git a/taiga/export_import/api.py b/taiga/export_import/api.py index bf5cf1a9..4e6f87e0 100644 --- a/taiga/export_import/api.py +++ b/taiga/export_import/api.py @@ -18,6 +18,7 @@ import codecs import uuid +import gzip from django.utils.decorators import method_decorator from django.utils.translation import ugettext as _ @@ -64,16 +65,24 @@ class ProjectExporterViewSet(mixins.ImportThrottlingPolicyMixin, GenericViewSet) project = get_object_or_404(self.get_queryset(), pk=pk) self.check_permissions(request, 'export_project', project) + dump_format = request.QUERY_PARAMS.get("dump_format", None) + if settings.CELERY_ENABLED: - task = tasks.dump_project.delay(request.user, project) - tasks.delete_project_dump.apply_async((project.pk, project.slug, task.id), + task = tasks.dump_project.delay(request.user, project, dump_format) + tasks.delete_project_dump.apply_async((project.pk, project.slug, task.id, dump_format), countdown=settings.EXPORTS_TTL) return response.Accepted({"export_id": task.id}) - path = "exports/{}/{}-{}.json".format(project.pk, project.slug, uuid.uuid4().hex) - storage_path = default_storage.path(path) - with default_storage.open(storage_path, mode="w") as outfile: - services.render_project(project, outfile) + if dump_format == "gzip": + path = "exports/{}/{}-{}.json.gz".format(project.pk, project.slug, uuid.uuid4().hex) + storage_path = default_storage.path(path) + with default_storage.open(storage_path, mode="wb") as outfile: + services.render_project(project, gzip.GzipFile(fileobj=outfile)) + else: + path = "exports/{}/{}-{}.json".format(project.pk, project.slug, uuid.uuid4().hex) + storage_path = default_storage.path(path) + with default_storage.open(storage_path, mode="wb") as outfile: + services.render_project(project, outfile) response_data = { "url": default_storage.url(path) diff --git a/taiga/export_import/management/commands/dump_project.py b/taiga/export_import/management/commands/dump_project.py index 3b3ceaf6..3c6996f3 100644 --- a/taiga/export_import/management/commands/dump_project.py +++ b/taiga/export_import/management/commands/dump_project.py @@ -22,6 +22,7 @@ from taiga.projects.models import Project from taiga.export_import.services import render_project import os +import gzip class Command(BaseCommand): @@ -39,6 +40,13 @@ class Command(BaseCommand): metavar="DIR", help="Directory to save the json files. ('./' by default)") + parser.add_argument("-f", "--format", + action="store", + dest="format", + default="plain", + metavar="[plain|gzip]", + help="Format to the output file plain json or gzipped json. ('plain' by default)") + def handle(self, *args, **options): dst_dir = options["dst_dir"] @@ -56,8 +64,13 @@ class Command(BaseCommand): except Project.DoesNotExist: raise CommandError("Project '{}' does not exist".format(project_slug)) - dst_file = os.path.join(dst_dir, "{}.json".format(project_slug)) - with open(dst_file, "w") as f: - render_project(project, f) + if options["format"] == "gzip": + dst_file = os.path.join(dst_dir, "{}.json.gz".format(project_slug)) + with gzip.GzipFile(dst_file, "wb") as f: + render_project(project, f) + else: + dst_file = os.path.join(dst_dir, "{}.json".format(project_slug)) + with open(dst_file, "wb") as f: + render_project(project, f) print("-> Generate dump of project '{}' in '{}'".format(project.name, dst_file)) diff --git a/taiga/export_import/services/render.py b/taiga/export_import/services/render.py index 19015878..923647a7 100644 --- a/taiga/export_import/services/render.py +++ b/taiga/export_import/services/render.py @@ -34,13 +34,13 @@ from .. import serializers def render_project(project, outfile, chunk_size = 8190): serializer = serializers.ProjectExportSerializer(project) - outfile.write('{\n') + outfile.write(b'{\n') first_field = True for field_name in serializer.fields.keys(): # Avoid writing "," in the last element if not first_field: - outfile.write(",\n") + outfile.write(b",\n") else: first_field = False @@ -56,7 +56,7 @@ def render_project(project, outfile, chunk_size = 8190): value = value.select_related('severity', 'priority', 'type') value = value.prefetch_related('history_entry', 'attachments') - outfile.write('"{}": [\n'.format(field_name)) + outfile.write('"{}": [\n'.format(field_name).encode()) attachments_field = field.fields.pop("attachments", None) if attachments_field: @@ -66,20 +66,20 @@ def render_project(project, outfile, chunk_size = 8190): for item in value.iterator(): # Avoid writing "," in the last element if not first_item: - outfile.write(",\n") + outfile.write(b",\n") else: first_item = False dumped_value = json.dumps(field.to_native(item)) writing_value = dumped_value[:-1]+ ',\n "attachments": [\n' - outfile.write(writing_value) + outfile.write(writing_value.encode()) first_attachment = True for attachment in item.attachments.iterator(): # Avoid writing "," in the last element if not first_attachment: - outfile.write(",\n") + outfile.write(b",\n") else: first_attachment = False @@ -88,7 +88,7 @@ def render_project(project, outfile, chunk_size = 8190): attached_file_serializer = attachment_serializer.fields.pop("attached_file") dumped_value = json.dumps(attachment_serializer.data) dumped_value = dumped_value[:-1] + ',\n "attached_file":{\n "data":"' - outfile.write(dumped_value) + outfile.write(dumped_value.encode()) # We write the attached_files by chunks so the memory used is not increased attachment_file = attachment.attached_file @@ -99,32 +99,32 @@ def render_project(project, outfile, chunk_size = 8190): if not bin_data: break - b64_data = base64.b64encode(bin_data).decode('utf-8') + b64_data = base64.b64encode(bin_data) outfile.write(b64_data) outfile.write('", \n "name":"{}"}}\n}}'.format( - os.path.basename(attachment_file.name))) + os.path.basename(attachment_file.name)).encode()) - outfile.write(']}') + outfile.write(b']}') outfile.flush() gc.collect() - outfile.write(']') + outfile.write(b']') else: value = field.field_to_native(project, field_name) - outfile.write('"{}": {}'.format(field_name, json.dumps(value))) + outfile.write('"{}": {}'.format(field_name, json.dumps(value)).encode()) # Generate the timeline - outfile.write(',\n"timeline": [\n') + outfile.write(b',\n"timeline": [\n') first_timeline = True for timeline_item in get_project_timeline(project).iterator(): # Avoid writing "," in the last element if not first_timeline: - outfile.write(",\n") + outfile.write(b",\n") else: first_timeline = False dumped_value = json.dumps(serializers.TimelineExportSerializer(timeline_item).data) - outfile.write(dumped_value) + outfile.write(dumped_value.encode()) - outfile.write(']}\n') + outfile.write(b']}\n') diff --git a/taiga/export_import/tasks.py b/taiga/export_import/tasks.py index 8ba61645..aa75c257 100644 --- a/taiga/export_import/tasks.py +++ b/taiga/export_import/tasks.py @@ -19,6 +19,7 @@ import datetime import logging import sys +import gzip from django.core.files.storage import default_storage from django.core.files.base import ContentFile @@ -41,14 +42,20 @@ import resource @app.task(bind=True) -def dump_project(self, user, project): - path = "exports/{}/{}-{}.json".format(project.pk, project.slug, self.request.id) - storage_path = default_storage.path(path) - +def dump_project(self, user, project, dump_format): try: + if dump_format == "gzip": + path = "exports/{}/{}-{}.json.gz".format(project.pk, project.slug, self.request.id) + storage_path = default_storage.path(path) + with default_storage.open(storage_path, mode="wb") as outfile: + services.render_project(project, gzip.GzipFile(fileobj=outfile)) + else: + path = "exports/{}/{}-{}.json".format(project.pk, project.slug, self.request.id) + storage_path = default_storage.path(path) + with default_storage.open(storage_path, mode="wb") as outfile: + services.render_project(project, outfile) + url = default_storage.url(path) - with default_storage.open(storage_path, mode="w") as outfile: - services.render_project(project, outfile) except Exception: # Error @@ -75,8 +82,12 @@ def dump_project(self, user, project): @app.task -def delete_project_dump(project_id, project_slug, task_id): - default_storage.delete("exports/{}/{}-{}.json".format(project_id, project_slug, task_id)) +def delete_project_dump(project_id, project_slug, task_id, dump_format): + if dump_format == "gzip": + path = "exports/{}/{}-{}.json.gz".format(project_id, project_slug, task_id) + else: + path = "exports/{}/{}-{}.json".format(project_id, project_slug, task_id) + default_storage.delete(path) ADMIN_ERROR_LOAD_PROJECT_DUMP_MESSAGE = _(""" diff --git a/tests/integration/test_exporter_api.py b/tests/integration/test_exporter_api.py index c8727ae8..5ea4dd0e 100644 --- a/tests/integration/test_exporter_api.py +++ b/tests/integration/test_exporter_api.py @@ -53,6 +53,24 @@ def test_valid_project_export_with_celery_disabled(client, settings): assert response.status_code == 200 response_data = response.data assert "url" in response_data + assert response_data["url"].endswith(".json") + + +def test_valid_project_export_with_celery_disabled_and_gzip(client, settings): + settings.CELERY_ENABLED = False + + user = f.UserFactory.create() + project = f.ProjectFactory.create(owner=user) + f.MembershipFactory(project=project, user=user, is_admin=True) + client.login(user) + + url = reverse("exporter-detail", args=[project.pk]) + + response = client.get(url+"?dump_format=gzip", content_type="application/json") + assert response.status_code == 200 + response_data = response.data + assert "url" in response_data + assert response_data["url"].endswith(".gz") def test_valid_project_export_with_celery_enabled(client, settings): @@ -72,7 +90,29 @@ def test_valid_project_export_with_celery_enabled(client, settings): response_data = response.data assert "export_id" in response_data - args = (project.id, project.slug, response_data["export_id"],) + args = (project.id, project.slug, response_data["export_id"], None) + kwargs = {"countdown": settings.EXPORTS_TTL} + delete_project_dump_mock.apply_async.assert_called_once_with(args, **kwargs) + + +def test_valid_project_export_with_celery_enabled_and_gzip(client, settings): + settings.CELERY_ENABLED = True + + user = f.UserFactory.create() + project = f.ProjectFactory.create(owner=user) + f.MembershipFactory(project=project, user=user, is_admin=True) + client.login(user) + + url = reverse("exporter-detail", args=[project.pk]) + + #delete_project_dump task should have been launched + with mock.patch('taiga.export_import.tasks.delete_project_dump') as delete_project_dump_mock: + response = client.get(url+"?dump_format=gzip", content_type="application/json") + assert response.status_code == 202 + response_data = response.data + assert "export_id" in response_data + + args = (project.id, project.slug, response_data["export_id"], "gzip") kwargs = {"countdown": settings.EXPORTS_TTL} delete_project_dump_mock.apply_async.assert_called_once_with(args, **kwargs) diff --git a/tests/unit/test_export.py b/tests/unit/test_export.py index 546814a8..a8ce775f 100644 --- a/tests/unit/test_export.py +++ b/tests/unit/test_export.py @@ -28,7 +28,7 @@ pytestmark = pytest.mark.django_db def test_export_issue_finish_date(client): issue = f.IssueFactory.create(finished_date="2014-10-22") - output = io.StringIO() + output = io.BytesIO() render_project(issue.project, output) project_data = json.loads(output.getvalue()) finish_date = project_data["issues"][0]["finished_date"] @@ -37,7 +37,7 @@ def test_export_issue_finish_date(client): def test_export_user_story_finish_date(client): user_story = f.UserStoryFactory.create(finish_date="2014-10-22") - output = io.StringIO() + output = io.BytesIO() render_project(user_story.project, output) project_data = json.loads(output.getvalue()) finish_date = project_data["user_stories"][0]["finish_date"]