-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCovid-Project-Queries.sql
166 lines (143 loc) · 6.43 KB
/
Covid-Project-Queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/* Dataset source: https://ourworldindata.org/covid-deaths
The imported file was split in 2 files (covid-deaths and covid-vaccionations) for this project so JOIN statements can be used later on.
The objective of this file is showing the use of different statements such as CTEs, temporary tables, Windows funtions,
Aggregate functions, Joins, etc.
Inspired by 'Alex The Analyst' */
-- COVID DEATHS TABLE
-- 1. Data preview of the columns we are more interested in
SELECT Location, date, total_Cases, new_cases, total_DEaths, population
FROM `covid-deaths`
ORDER BY 1,2;
-- 2. Total Cases vs Total Deaths in Australia (modify location for other countries)
-- Percentage OF likelihood of dying if getting infected.
-- (When analysing using 'total_Cases', note that as time passes by there are more chances of people getting infected MORE THAN ONCE so total_cases != total people infected)
SELECT continent, Location, date, total_cases, new_cases, total_deaths, (total_deaths/total_cases)*100 AS Death_Percentage
FROM `covid-deaths`
-- WHERE Continent != ""
WHERE location = 'Australia'
ORDER BY 1,2 ;
-- 3. Total Cases vs Population per country
-- (When analysing using 'total_Cases', note that as time passes by there are more chances of people getting infected MORE THAN ONCE so total_cases != total people infected)
SELECT Location, date,
population, total_cases,
new_cases, (total_cases/population)*100 AS Infected_Percentage
FROM `covid-deaths`
WHERE LOCATION = 'australia'
ORDER BY 1,2 ;
-- 4. Top 10 Higuest death count vs population per country (not a percentage, just quantity)
SELECT Location, population, MAX(total_deaths) AS Higuest_deaths
FROM `covid-deaths`
WHERE continent != ""
GROUP BY Location, population
ORDER BY Higuest_deaths DESC
LIMIT 10;
-- 5. Top 10 Higuest Death rate PERCENTAGE count vs population per country
SELECT Location, population,
MAX(total_deaths) AS Higuest_deaths,
ROUND(MAX((total_deaths/population)*100),4) AS Death_Percentage
FROM `covid-deaths`
WHERE continent != ""
GROUP BY Location, population
ORDER BY Death_Percentage DESC
LIMIT 10;
-- NOW THE SAME FOR CONTINENTS
-- 6. Top 10 Higuest death COUNT per continent (not a percentage, just quantity)
SELECT location, MAX(total_deaths) AS Higuest_deaths
FROM `covid-deaths`
WHERE continent = ""
AND location in ('Europe', 'South America', 'North America', 'Oceania', 'Asia', 'Africa')
GROUP BY Location, population
ORDER BY Higuest_deaths DESC
LIMIT 10;
-- 7. Top 10 Higuest Death rate PERCENTAGE count vs population per continent
SELECT Location, population,
MAX(total_deaths) AS Higuest_deaths,
ROUND(MAX((total_deaths/population)*100), 4) AS Death_Percentage
FROM `covid-deaths`
WHERE continent = ""
AND location in ('Europe', 'South America', 'North America', 'Oceania', 'Asia', 'Africa')
GROUP BY Location, population
ORDER BY Death_Percentage DESC
LIMIT 10 ;
-- AROUND THE WORLD
-- 8. PER DAY: Cases, new cases, deaths per day (keep in mind a person can get infected again so 'total cases' are counting people who got infected more than once)
-- (this assumes the 'total_cases' column doesn't exist in the original dataset)
SELECT date, SUM(new_cases) AS Total_new_cases,
SUM(new_deaths) AS total_Deaths,
ROUND(SUM(new_Deaths)/SUM(total_CAses)*100, 4) as death_percentage
FROM `covid-deaths`
WHERE continent != ""
GROUP BY date
ORDER BY date;
-- 9. TOTALS: Total cases vs death percentage up until specified "date"
SELECT SUM(new_cases) AS Total_cases,
SUM(new_deaths) AS total_Deaths,
ROUND(SUM(new_Deaths)/SUM(new_CAses)*100, 4) as death_percentage
FROM `covid-deaths`
WHERE continent != "" AND date < "2024-01-01" -- choose reference date
-- GROUP BY date
-- ORDER BY date
;
-- COMBINING COVID-DEATHS TABLE WITH COVID-VACCINATIONS TABLE (using Joins)
-- 10. Adding a Rolling Total using 'New vaccines' to identify total of vaccines (this assumes the 'total_vaccinations' column doesn't exist)
SELECT d.location, d.date, d.population, v.new_vaccinations,
SUM(v.new_vaccinations) OVER (PARTITION BY d.location ORDER BY d.location, d.date) AS rol_tot_vac
FROM `covid-deaths` d
JOIN `covid-vaccinations` v
ON d.location = v.location
AND d.date = v.date
WHERE d.continent != "" AND d.date < '2024-01-01' AND d.location = "Australia"
order by 1,2;
/* 11. Now I want to use the result of the rol_total_vaccin to calculate relation(percentage) with population.
However, rol_tot_vac is created in the SELECT and cannot be used again in the same select. There are 2 options to be able to use it.
IMPORTANT: Total vaccines includes the 2 doses per person and the boosters. It means in one point in history the total
vaccinations quantity could be more than double the population*/
-- A) Using CTE
With CTE (location, date, population, new_vaccinations, rol_tot_vac)
AS
(SELECT d.location, d.date, d.population, v.new_vaccinations,
SUM(v.new_vaccinations) OVER (PARTITION BY d.location ORDER BY d.location, d.date) AS rol_tot_vac
FROM `covid-deaths` d
JOIN `covid-vaccinations` v
ON d.location = v.location
AND d.date = v.date
WHERE d.continent != "" AND d.date < '2024-01-01' AND d.location = "Australia"
ORDER BY 1,2)
SELECT *, (rol_tot_vac/population)*100 AS VacPopPercentage
FROM CTE
;
-- B) Using TEMPORARY TABLES
DROP TABLE IF EXISTS TempTable; -- Useful when practicing with the temporary table multiple times
CREATE TABLE TempTable
(
location varchar(255),
Date date,
population numeric,
new_vaccinations numeric,
rol_total_vac numeric
);
INSERT INTO TempTable -- Add the results of this query to the previously created table
SELECT d.location, d.date, d.population, v.new_vaccinations,
SUM(v.new_vaccinations) OVER (PARTITION BY d.location ORDER BY d.location, d.date) AS rol_tot_vac
FROM `covid-deaths` d
JOIN `covid-vaccinations` v
ON d.location = v.location
AND d.date = v.date
WHERE d.continent != "" AND d.date < '2024-01-01' AND d.location = "Australia"
order by 1,2;
SELECT *, (rol_total_vac/population)*100 -- Using the aggregate value 'rol_tot_vac' to calculate a new value in this query
FROM TempTable
;
-- 12. Create a VIEW that can be used later on as a virtual table
CREATE VIEW PercentagePopVaccin AS
SELECT d.location, d.date, d.population, v.new_vaccinations,
SUM(v.new_vaccinations) OVER (PARTITION BY d.location ORDER BY d.location, d.date) AS rol_tot_vac
FROM `covid-deaths` d
JOIN `covid-vaccinations` v
ON d.location = v.location
AND d.date = v.date
WHERE d.continent != "" AND d.date < '2024-01-01'
order by 1,2;
-- 13. Test View
SELECT *
FROM percentagepopvaccin