CSVDownloader
์๊ฐ๋ ์ง: 2025-06-08
CSV ๋ค์ด๋ก๋ ๊ธฐ๋ฅ์ ๊ด๋ฆฌ ์๋น์ค์์ ์์ฃผ ์ฌ์ฉ๋๋ฉฐ, ํํฐ๋ง, ์ ๋ ฌ, ํฌ๋งท ์ ์ด, ๋น๋๊ธฐ ์ฒ๋ฆฌ, ํ์ผ ์ ๋ก๋ ๋ฑ ๋ณต์กํ ์๊ตฌ์ฌํญ์ด ๋ง๋ค. CSVDownloader๋ ์ด๋ฐ ๋ฐ๋ณต ์์ ์ ๊ณตํตํํ์ฌ ๋น ๋ฅด๊ณ ์์ ์ ์ผ๋ก ์ฒ๋ฆฌํ๊ธฐ ์ํด ๋ง๋ค์ด์ก๋ค.
๋ด๊ฐ ๋ง๋ CSVDownloader๋ ์๋์ ๊ธฐ๋ฅ์ ์ ๊ณตํ๋ค.
CSVDownloader
๋ ์ค๋ฌด์์ ํ์ํ CSV ๋ค์ด๋ก๋ ๊ธฐ๋ฅ์ ํ๋์ ํด๋์ค๋ก ๊น๋ํ๊ฒ ์บก์ํํ ์ ํธ๋ฆฌํฐ์ด๋ค.
Django + DRF ๊ธฐ๋ฐ์์ ๊ด๋ฆฌ ํ๋ฉด, ์ด๋๋ฏผํด, ๋ฐฑ์คํผ์ค ๋ค์ด๋ก๋ ๊ธฐ๋ฅ์ ๊ตฌํํ ๋ ๋น ๋ฅด๊ฒ ์ ์ฉํ ์ ์๊ณ , S3 ์ ๋ก๋๊น์ง ์๋์ผ๋ก ์ฒ๋ฆฌํด์ฃผ๋ ๊ตฌ์กฐ๋ก ๋๋ ๋ฐ์ดํฐ ์ฒ๋ฆฌ๋ ๋ฌด๋ฆฌ ์์ด ๋์ ๊ฐ๋ฅํ๋ค.
CSVDownloader
๋ ๋ค์ ์ปดํฌ๋ํธ๋ค๋ก ๊ตฌ์ฑ๋ฉ๋๋ค:
queryset
, filterset_class
, serializer_class
๊ธฐ๋ฐ์ผ๋ก ๋์ ๋ฐ์ดํฐ ์ค์ import csv
from io import StringIO
from typing import List
from django.conf import settings
from django.http import StreamingHttpResponse
from django.utils import timezone
from rest_framework.exceptions import ValidationError
def set_progress(task_id, progress, total):
"""Celery task ์ ์งํํํฉ ์ ๋ณด๋ฅผ redis ์ set ํด์ฃผ๋ ํจ์"""
try:
cache.set(
CacheKey.CELERY_TASK_PROGRESS.get(task_id=task_id),
{"progress": progress, "total": total},
CacheKey.CELERY_TASK_PROGRESS.timeout,
)
except Exception as e:
logger.warning(f"๋น๋๊ธฐ ์ฒ๋ฆฌ set_process ์ค๋ฅ: {e}")
def upload_s3(temp_path: str, upload_path: str, bucket: str) -> str:
try:
s3_client = AWSS3ClientService.get_client()
s3_client.upload_file(temp_path, bucket, upload_path)
download_url = AWSS3ClientService.generate_presigned_url(
"get",
bucket,
upload_path,
expires_in=24 * 60 * 60,
)
return download_url
except Exception as e:
logger.error(f"s3 ์
๋ก๋ ์คํจ {e}")
raise
class CSVDownloader:
"""
CSV ๋ค์ด๋ก๋ ์, ์ข ๋ ์ฝ๊ณ ๋น ๋ฅด๊ฒ ๊ฐ๋ฐํ๊ธฐ์ํ์ฌ
Django + DRF + Celery ์ ์ ์ฉํ ๊ธฐ๋ฅ์ ์ต๋ํ ํ์ฉํ์ฌ ๋ง๋ ๋ชจ๋
- ์ด ํด๋์ค๋ฅผ ์์๋ฐ์ ํ์ ํด๋์ค๋ฅผ ๋ง๋ค๊ณ
- ํ์์ class ์์ฑ์ override ํ์ฌ
- ์ํ๋ ๋ฐ์ดํฐ๋ฅผ ์ํ๋ ๋ฐฉ์์ผ๋ก CSV ์ถ๋ ฅํ์ฌ ์ฌ์ฉํ์ธ์
"""
filterset_class = None # queryset ํํฐ ํด๋์ค
query_params = None # GET request ์ query_param ๊ทธ๋๋ก ์ ๋ฌ
queryset = None # Django ORM ์ QuerySet ๊ฐ์ฒด (DB ์์ ๋ถ๋ฌ์ฌ ๋ฐ์ดํฐ)
ordering_fields = None # ์ ๋ ฌ ์กฐ๊ฑด ["-id", "-created_at"]
serializer_class = None # CSV ๋ฐ์ดํฐ์ฉ ์๋ฆฌ์ผ๋ผ์ด์ ธ ํด๋์ค
value_convert_map = { # CSV ๋ก ์ถ๋ ฅํ ๋ ์๋ ๋ณํํ ํ๋ ๋ฐ์ดํฐ
"None": "-",
"True": "Y",
"False": "N",
}
# ๊ธฐํ ์ธํ
allow_row_limit = 100000
s3_bucket = settings.AWS_STORAGE_PRIVATE_BUCKET_NAME
s3_path = None
file_prefix = "csv_download" # ํ์ผ๋ช
celery_task_id = None # progress ์ฒดํน์ฉ (redis ์ ์ ์ฅ)
celery_progress_row_offset = 50 # progress ๋ฐ์ดํฐ๋ฅผ ๊ฐฑ์ ํ ์ฒ๋ฆฌ row ๊ฐ์ ๊ธฐ์ค
def __init__(self, *args, **kwargs):
self.user_id = kwargs.get("user_id")
self.query_params = kwargs.get("query_params", {})
self.filterset_class = kwargs.get("filterset_class", self.filterset_class)
self.serializer_class = kwargs.get("serializer_class", self.serializer_class)
self.value_convert_map = kwargs.get("value_convert_map", self.value_convert_map)
self.allow_row_limit = kwargs.get("allow_row_limit", self.allow_row_limit)
self.file_prefix = kwargs.get("file_prefix", self.file_prefix)
self.s3_bucket = kwargs.get("s3_bucket", self.s3_bucket)
if self.s3_path is None:
today = timezone.now().strftime("%Y%m%d")
self.s3_path = kwargs.get("s3_path", f"download/{today}/csv")
queryset = kwargs.get("queryset")
if queryset is None:
raise ValidationError("queryset is required")
self.queryset = self.get_queryset(queryset)
self.celery_task_id = kwargs.get("celery_task_id")
if self.celery_task_id:
set_progress(self.celery_task_id, 1, 100)
def get_queryset(self, queryset):
if self.filterset_class:
filterset = self.filterset_class(self.query_params, queryset=queryset)
queryset = filterset.qs
requested_record_count = queryset.count()
if self.allow_row_limit and requested_record_count > self.allow_row_limit:
raise ValidationError(
f"๋ค์ด๋ก๋ ํ๋ ์ด๊ณผ: ์ต๋ {self.allow_row_limit} ๊ฐ ๊น์ง ์กฐํ ๊ฐ๋ฅํฉ๋๋ค. "
f"(์์ฒญ ๋ ์ฝ๋ ์: {requested_record_count})"
)
if self.ordering_fields:
queryset = queryset.order_by(*self.ordering_fields)
else:
queryset = queryset.order_by("-pk")
return queryset
def get_result_as_download_url(self) -> str:
"""์
๋ก๋ํ CSV ํ์ผ์ ๋ค์ด๋ก๋ URL ๋ฆฌํด"""
csv_content = self.generate_csv()
server_file_path = f"{settings.TMP_FILE_DIR}/{self.get_filename()}"
s3_upload_path = f"{self.s3_path}/{self.get_filename()}"
with open(server_file_path, "w", newline="", encoding="utf-8-sig") as f:
f.write(csv_content.getvalue())
download_url = upload_s3(server_file_path, s3_upload_path, self.s3_bucket)
return download_url
def get_result_as_streaming_response(self) -> StreamingHttpResponse:
"""CSV ํ์ผ์ ์คํธ๋ฆฌ๋ฐ ์๋ต์ผ๋ก ๋ฆฌํด"""
response = StreamingHttpResponse(
(line + "\n" for line in self.generate_csv_lines()), # generator ์ฌ์ฉ
content_type="text/csv"
)
response["Content-Disposition"] = f'attachment; filename="{self.get_filename()}"'
return response
def generate_csv_lines(self):
yield ",".join(self.get_csv_headers())
for idx, item in enumerate(self.queryset.iterator()): # iterator() ์ฌ์ฉ
serializer = self.get_serializer_class()(item)
yield ",".join(self.get_csv_row(serializer.data))
if self.celery_task_id and idx % self.celery_progress_row_offset == 0:
set_progress(self.celery_task_id, int(idx / len(self.queryset) * 100), 100)
def generate_csv(self) -> StringIO:
csv_file = StringIO()
writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
headers = self.get_csv_headers()
writer.writerow(headers)
total = len(self.queryset)
for idx, item in enumerate(self.queryset):
serializer_class = self.get_serializer_class()
serializer = serializer_class(item)
row = self.get_csv_row(serializer.data)
writer.writerow(row)
if (
idx % self.celery_progress_row_offset == 0
) and self.celery_task_id is not None:
set_progress(self.celery_task_id, int(idx / total * 100), 100)
csv_file.seek(0)
return csv_file
def get_serializer_class(self):
if self.serializer_class is None:
raise NotImplementedError("serializer_class is required")
return self.serializer_class
def get_filename(self):
timestamp = timezone.now().strftime("%Y%m%d_%H%M%S")
return f"{self.file_prefix}_{timestamp}.csv"
def get_csv_headers(self) -> List[str]:
serializer_class = self.get_serializer_class()
serializer = serializer_class()
headers = []
for field_name, field in serializer.fields.items():
if field.label is not None:
headers.append(str(field.label))
elif hasattr(field, "Meta") and hasattr(field.Meta, "model"):
model_field = field.Meta.model._meta.get_field(field_name)
headers.append(str(model_field.verbose_name))
else:
headers.append(field_name)
return headers
def get_csv_row(self, item) -> List[str]:
row_data = []
for value in item.values():
str_val = str(value)
if str_val in self.value_convert_map:
converted_val = self.value_convert_map[str_val]
row_data.append(converted_val)
else:
row_data.append(str_val)
return row_data
from myapp.csv_downloader import CSVDownloader
def download_view(request):
downloader = CSVDownloader(
user_id=request.user.id,
queryset=MyModel.objects.all(),
filterset_class=MyModelFilter,
serializer_class=MyModelSerializer,
query_params=request.GET,
)
return downloader.get_result_as_streaming_response()
class CMSMissionRewardLogCSVDownloader(CSVDownloader):
filterset_class = CMSMissionRewardLogFilter
file_prefix = "cms_mission_reward_log_csv_download"
s3_bucket = settings.AWS_STORAGE_PRIVATE_BUCKET_NAME
serializer_class = CMSMissionRewardLogSerializerForCSV
@shared_task()
def mission_reward_log_csv_download(query_params: dict):
queryset = (
MissionRewardLog.objects.all()
.select_related("user", "mission", "reward")
.order_by("-id")
)
download_url = CMSMissionRewardLogCSVDownloader(
query_params=query_params,
queryset=queryset,
celery_task_id=current_task.request.id,
).get_result_as_download_url()
return {"file_url": download_url}
์ด๊ธฐํ
allow_row_limit
์ด๊ณผ ์ ์ค๋ฅ ๋ฐ์CSV ์์ฑ
serializer_class
๋ฅผ ํตํด ํค๋ ์์ฑTrue
, False
, None
์ ์ปค์คํ
๋ฌธ์์ด๋ก ๋ณํ (value_convert_map
)celery_task_id
๊ฐ ์์ผ๋ฉด ์ผ์ row๋ง๋ค set_progress()
ํธ์ถ๊ฒฐ๊ณผ ๋ฐํ
ํญ๋ชฉ | ์ค๋ช |
---|---|
๋ฒ์ฉ์ฑ | filterset, serializer ์ฃผ์ ๋ฐฉ์์ผ๋ก ์ด๋์๋ ์ฌ์ฉ ๊ฐ๋ฅ |
ํ์ฅ์ฑ | ์งํ๋ฅ ํธ๋ํน, S3 ์ ๋ก๋, ํ์ผ ์ด๋ฆ ๋ฑ ์ปค์คํฐ๋ง์ด์ง ๊ฐ๋ฅ |
์ผ๊ด์ฑ | DRF serializer๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉํ์ฌ API ์๋ต๊ณผ ๋์ผํ ํฌ๋งท ์ ์ง |
์ค๋ฌด ์ต์ ํ | row ์ ํ, label ํค๋, value ๋ณํ ๋ฑ ์ค์ ์ด์์ ์ ํฉ |
ํญ๋ชฉ | ์ค๋ช |
---|---|
๋ฉ๋ชจ๋ฆฌ ๊ธฐ๋ฐ ์ฒ๋ฆฌ | ์ ์ฒด CSV๋ฅผ ๋ฉ๋ชจ๋ฆฌ์ ์ฌ๋ฆฌ๋ ๊ตฌ์กฐ (10๋ง๊ฑด ์ด์ ์ ์ ์ ํ์) |
serializer ์ฑ๋ฅ | ํ๋๊ฐ ๋ณต์กํ๊ฑฐ๋ ์์ญ๋ง ๊ฑด ์ด์์ผ ๊ฒฝ์ฐ ๋๋ ค์ง ์ ์์ |
row ์๋ฌ ์ฒ๋ฆฌ ์์ | ํน์ row ์ง๋ ฌํ ์๋ฌ ๋ฐ์ ์ ์ ์ฒด ์คํจ ๊ฐ๋ฅ์ฑ |