for 루프 안의 Django ORM 때문에 로딩이 너무 오래 걸리는 페이지가 있었다. 이 페이지는 클라이언트와 서버 모두 총체적 난국 이었다. 클라이언트는 pagination 처리 조차 되어 있지 않아, 한 두 번의 api 통신으로 모든 데이터를 가져오는 구조다.
client, server 모두 고쳐야 했지만, CTO님은 일단 resource가 한정적이니, 서버만 일부 고쳐서 개선해보자고 하여 그렇게 진행하게 된 작업.
문제의 for loop 함수
def cuttingContents(contents):
result = []
for c in contents:
data = dict({
'id': c.id,
'created_at': localtime(c.created_at).strftime('%Y-%m-%d'),
'subject': c.subject,
'content_type': c.content_type,
'teacher': None,
'tag': c.tag,
'count': 0
})
categories = c.categories.filter(parent__isnull=False)
if c.content_type == Content.TYPE_VIDEO_LECTURE:
if c.teachers:
data['teacher'] = [ {'id': o.id, 'name': o.name} for o in c.teachers.all() ]
data['count'] = c.extra.lectures.filter(is_deleted=False).count()
categories = c.categories.filter(id__in=total_video_categories)
data['review_count'] = c.public_reviews.count()
data['review_score'] = c.review_avg_score
view_count_date = (timezone.now() - timedelta(days=180)).strftime('%Y-%m-%d 00:00:00')
data['view_count'] = VideoViewCount.objects.filter(video=c.video.id, created_at__gte=view_count_date).count()
weekago = (timezone.now() - timedelta(days=3)).strftime('%Y-%m-%d 00:00:00')
data['look_count'] = VideoViewCount.objects.filter(video=c.video.id, created_at__gte=weekago).count()
if c.content_type == Content.TYPE_EXAM:
categories = [c.exam.company]
data['categories'] = [ {'id': o.id, 'name': o.name} for o in categories ]
result.append(data)
return result
해당 페이지 api의 모든 로직을 변경할 필요는 없었다. 문제의 for loop 함수만 잘 고친다면 말이다.
새로 만든 함수
def get_data_of_contents(contents, user_content_id, content_type):
exclude_added_usercontents = """
AND NOT ("content_content"."id" IN (SELECT U0."id" FROM "content_content" U0
INNER JOIN "payment_usercontent" U1 ON (U0."id" = U1."content_id")
WHERE (U1."parent_id" = %s
AND U1."expired_at" > %s AT TIME ZONE 'utc' AT TIME ZONE 'kst')))"""
current_time = timezone.now()
base_query = contents.query
query = """
WITH "base_table" AS (
{base_query} {exclude_added_usercontents}
)
SELECT "base_table"."id", "base_table"."subject", "base_table"."content_type",
"base_table"."tag", ("base_table"."created_at" AT TIME ZONE 'Asia/Seoul')::date AS "created_date", """\
.format(base_query=base_query, exclude_added_usercontents=exclude_added_usercontents)
if content_type == Content.TYPE_VIDEO_LECTURE:
review_count = """
SELECT COUNT(*) AS "review_count"
FROM "content_content"
INNER JOIN "review_userreview" ON ("content_content"."id" = "review_userreview"."content_id")
WHERE ("review_userreview"."is_public" = True AND "review_userreview"."content_id" = "base_table"."id")"""
review_score = """
SELECT AVG("review_userreview"."score") AS "review_score"
FROM "content_content"
INNER JOIN "review_userreview" ON ("content_content"."id" = "review_userreview"."content_id")
WHERE ("review_userreview"."is_public" = True AND "review_userreview"."content_id" = "base_table"."id")"""
lecture_count = """
SELECT COUNT(*) as "count"
FROM "content_content"
INNER JOIN "video_videocontent" ON ("content_content"."id" = "video_videocontent"."content_id")
INNER JOIN "video_videocontentlecture" ON ("video_videocontent"."id" = "video_videocontentlecture"."video_content_id")
WHERE ("video_videocontentlecture"."is_deleted" = False AND "video_videocontent"."content_id" = "base_table"."id") """
number_of_view = """
SELECT COUNT(*) as "view_count"
FROM "content_content"
INNER JOIN "video_videocontent" ON ("content_content"."id" = "video_videocontent"."content_id")
INNER JOIN "video_videoviewcount" ON ("video_videocontent"."id" = "video_videoviewcount"."video_id")
WHERE "video_videoviewcount"."created_at" >= %s AT TIME ZONE 'utc' AT TIME ZONE 'kst'
AND "video_videocontent"."content_id" = "base_table"."id" """
number_of_view_per_week = """
SELECT COUNT(*) as "look_count"
FROM "content_content"
INNER JOIN "video_videocontent" ON ("content_content"."id" = "video_videocontent"."content_id")
INNER JOIN "video_videoviewcount" ON ("video_videocontent"."id" = "video_videoviewcount"."video_id")
WHERE "video_videoviewcount"."created_at" >= %s AT TIME ZONE 'utc' AT TIME ZONE 'kst'
AND "video_videocontent"."content_id" = "base_table"."id" """
categories = """
SELECT ARRAY_AGG("content_contentcategory"."name") AS "categories"
FROM "content_content"
INNER JOIN "content_content_categories" ON ("content_content"."id" = "content_content_categories"."content_id")
INNER JOIN "content_contentcategory" ON ("content_content_categories"."contentcategory_id" = "content_contentcategory"."id")
WHERE ("content_content_categories"."contentcategory_id" IN %s
AND "content_content_categories"."content_id" = "base_table"."id") """
teachers = """
SELECT ARRAY_AGG("content_content_teachers"."teacher_id") AS "teacher"
FROM "content_content"
INNER JOIN "content_content_teachers" ON ("content_content"."id" = "content_content_teachers"."content_id")
WHERE "content_content_teachers"."content_id" = "base_table"."id" """
query += """
({review_count}), ({review_score}), ({lecture_count}),
({number_of_view}), ({number_of_view_per_week}), ({categories}), ({teachers})
FROM "base_table"
GROUP BY "base_table"."id", "base_table"."created_at",
"base_table"."subject", "base_table"."content_type", "base_table"."tag"
ORDER BY "base_table"."created_at" DESC; """\
.format(
review_count=review_count, review_score=review_score, lecture_count=lecture_count,
number_of_view=number_of_view, number_of_view_per_week=number_of_view_per_week,
categories=categories, teachers=teachers)
view_count_date = (current_time - timedelta(days=180)).strftime('%Y-%m-%d 00:00:00')
weekago = (current_time - timedelta(days=7)).strftime('%Y-%m-%d 00:00:00')
total_video_categories = 12, 6, 11, 7, 32, 24, 31, 23, 30, 22, 29, 21, 28, 20, 19, 27, 209, 3
params = [user_content_id, current_time, view_count_date, weekago, total_video_categories]
else:
if content_type == Content.TYPE_EXAM:
categories = """
SELECT ARRAY_AGG("content_company"."name") AS "categories"
FROM "content_content"
INNER JOIN "exam_examcontent" ON ("content_content"."id" = "exam_examcontent"."content_id")
INNER JOIN "content_company" ON ("exam_examcontent"."company_id" = "content_company"."id")
WHERE ("exam_examcontent"."content_id" = "base_table"."id") """
elif content_type == Content.TYPE_DOCUMENT:
categories = """
SELECT ARRAY_AGG("content_contentcategory"."name") AS "categories"
FROM "content_content"
INNER JOIN "content_content_categories" ON ("content_content"."id" = "content_content_categories"."content_id")
INNER JOIN "content_contentcategory" ON ("content_content_categories"."contentcategory_id" = "content_contentcategory"."id")
WHERE ("content_contentcategory"."key" IN ('job', 'industry', 'introduce', 'specialism', 'company')
AND "content_contentcategory"."service" = 1
AND "content_contentcategory"."content_type" = 2
AND "content_contentcategory"."parent_id" IS NOT NULL
AND "content_content_categories"."content_id" = "base_table"."id") """
params = [user_content_id, current_time]
query += """
({categories}) FROM "base_table"
GROUP BY "base_table"."id", "base_table"."created_at",
"base_table"."subject", "base_table"."content_type", "base_table"."tag"
ORDER BY "base_table"."created_at" DESC; """\
.format(categories=categories)
with connection.cursor() as cursor:
cursor.execute(query, params)
result = dictfetchall(cursor)
return result
문제의 함수는 또 다른 ORM의 결과값인 QuerySet을 인자값으로 받고 있었다.
Django Subquery()
를 썼다면
인자값으로 받은 QuerySet
과 Subquery
를 결합하여 더 짧고 가시적인 코드를 짤 수 있었을 것이다.
https://docs.djangoproject.com/en/2.1/ref/models/expressions/#subquery-expressions
허나, Subquery()
는 Django 1.11 버젼부터 도입된 거라 사용할 수 없었다.
별 수 없이 sql문을 짜기로 방향을 틀었다.
base_query = contents.query
와 같이
인자값으로 받은 QuerySet
의 query
를 호출하여 장고 쿼리셋을 sql문으로 바꾸기로 했다.
QuerySet에 포함됐던 일부 로직을 제거하고, sql문에 작성했다.
문자열 포맷팅으로 query를 완성해갔다.
Postgresql ARRAY_AGG
이번 작업으로 postgresql의 ARRAY_AGG
기능을 알 수 있었다.
이 기능으로 sql의 결과를 array로 받을 수 있다.
이는 장고에도 구현되어 있다.
https://docs.djangoproject.com/en/3.0/ref/contrib/postgres/aggregates/#arrayagg
Combining multiple aggregations
count든 average든 하나의 쿼리에서 하지 않고, 여러 개의 서브 쿼리로 만든 이유는?
여러 개의 aggregation을 annotate()
를 사용하여 하나의 쿼리에 담으면 결과에 문제가 생길 수 있다.
Combining multiple aggregations with annotate() will yield the wrong results because joins are used instead of subqueries
https://docs.djangoproject.com/en/2.1/topics/db/aggregation/#combining-multiple-aggregations