-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Queries.sql
126 lines (107 loc) · 4.26 KB
/
Queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
drop table if exists basics;
create table basics (
id varchar(15),
title_type varchar(30),
primary_title varchar(500),
original_title varchar(500),
is_adult int,
start_Year int,
end_year int,
runtime_minutes int,
genres varchar(200)
)
drop table if exists episodes;
create table episodes (
id varchar(15),
parent_id varchar(15),
season_number int,
episode_number int
)
drop table if exists ratings;
create table ratings (
id varchar(15),
avg_rating numeric(3, 1),
num_votes int
)
-- GENRES (Helper Table)
create table genres (
genre varchar(20)
)
insert into genres
values ('Action'),('Adventure'),('Animation'),('Biography'),('Comedy'),('Crime'),('Drama')
,('Family'),('Fantasy'),('Film-Noir'),('History'),('Horror'),('Music'),('Musical')
,('Mystery'),('Romance'),('Sci-Fi'),('Sport'),('Thriller'),('War'),('Western')
-- MOVIES and SERIES
select a.id, title_type,
case when title_type in ('movie', 'tvMovie') then 'Movie'
when title_type in ('tvSeries', 'tvMiniSeries') then 'Series'
end as item_type,
original_title, primary_title, start_year, runtime_minutes, genres, avg_rating, num_votes
into movies_series
from source_basics a
join source_ratings b on a.id = b.id
where title_type in ('movie','tvMovie','tvSeries','tvMiniSeries')
create index idx_item_type_movies_series on movies_series (item_type)
-- Series' Episodes
select a.parent_id "Series ID", a.id "Episode ID", a.season_number "Season", a.episode_number "Episode", b.avg_rating "Rating", b.num_votes "# Votes"
, c.original_title, c.primary_title, c.start_year, c.end_year, c.runtime_minutes, c.genres
, d.original_title episode_original_title, d.primary_title episode_title, d.start_year "Episode Year"
into series
from source_episodes a
join source_ratings b on a.id = b.id
join source_basics d on a.id = d.id
join source_basics c on a.parent_id = c.id
create index idx_primary_title_search on series (lower(primary_title) varchar_pattern_ops);
create index idx_primary_title_filter on series (primary_title);
----------------------
------- Cards --------
----------------------
-- Seasons Rating (AVG)
select "Season", avg("Rating") "Rating Average"
from series
where 1 = 1
[[ and {{primary_title}} ]]
[[ and "# Votes" > {{num_votes}} ]]
group by "Season"
-- episodes rating
select case when length(cast("Season" as varchar)) = 1 then case when length(cast("Episode" as varchar)) = 1 then concat('S0', cast("Season" as varchar), 'E0', cast("Episode" as varchar) )
else concat('S0', cast("Season" as varchar), 'E', cast("Episode" as varchar) )
end
else case when length(cast("Episode" as varchar)) = 1 then concat('S', cast("Season" as varchar), 'E0', cast("Episode" as varchar) )
else concat('S', cast("Season" as varchar), 'E', cast("Episode" as varchar) )
end
end as episode_full_name, "Rating"
from series
where "Season" is not Null
and "Episode" is not Null
[[ and {{primary_title}} ]]
[[ and "# Votes" > {{num_votes}} ]]
order by 1
-- episodes ranking
select episode_title "Episode Title", "Season", "Episode", "Rating", "# Votes", concat('https://www.imdb.com/title/', "Episode ID") "Link"
from series
where "Season" is not null
and "Episode" is not null
[[ and {{primary_title}} ]]
[[ and "# Votes" > {{num_votes}} ]]
order by "Rating" desc
-- Movies and Series
select primary_title "Title", item_type "Item Type", genres "Genres", start_year "Year", runtime_minutes "Runtime",
avg_rating "Rating", num_votes "# Votes", concat('https://www.imdb.com/title/', id) "Link"
from movies_series
where 1 = 1
[[ and {{item_type}} ]]
[[ and genres like concat('%', cast((select genre from genres where {{genre}} ) as varchar(20)), '%') ]]
[[ and {{year}} ]]
[[ and {{title}} ]]
[[ and avg_rating > {{rating}} ]]
[[ and num_votes > {{num_votes}} ]]
order by avg_rating desc
-- there are two rows for every record. we delete of them
-- (decided to delete the duplicates on the csv file, because it was more reliable)
-- delete from basics
-- where (ctid::text::point)[1]::int % 2 = 1
-- delete from episodes
-- where (ctid::text::point)[1]::int % 2 = 1
-- delete from ratings
-- where (ctid::text::point)[1]::int % 2 = 1