-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2011_Census_Data_Anlaysis_SQL.sql
More file actions
295 lines (215 loc) · 9.67 KB
/
2011_Census_Data_Anlaysis_SQL.sql
File metadata and controls
295 lines (215 loc) · 9.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
SELECT * FROM census_1
SELECT * FROM census_2
----1. # Total number of states and union territories in the dataset---
SELECT state from census_1
GROUP BY state;
---2. # SELECTING THE PARTTICULAR STATES TO VIEW THE DATA---
SELECT * FROM census_1
WHERE state IN ('Assam','Punjab')
ORDER BY state;
---3. # FINDING THE TOTAL POPULATION OF INDIA----
SELECT SUM(population) AS Total_Population FROM census_1
---4. # FINDING THE TOTAL POPULATION OF INDIA (STATEWISE)----
SELECT state,SUM(population) AS Total_Population
FROM census_1
GROUP BY state
ORDER BY Total_Population DESC;
---5. # FINDING THE AVERAGE POPULATION GROWTH RATE OF INDIA IN LAST 10 YAERS---
SELECT ROUND(AVG(growth),2) AS avg_growth FROM census_2;
----6. # FINDING THE AVERAGE POPULATION GROWTH RATE OF INDIA(STATEWISE) IN LAST 10 YAERS---
SELECT state,ROUND(AVG(growth),2) AS avg_growth
FROM census_2
GROUP BY state
ORDER BY avg_growth DESC
----7. # FINDING THE AVERAGE POPULATION GROWTH RATE OF INDIA(ASSAM) IN LAST 10 YAERS---
SELECT state,ROUND(AVG(growth),2) AS avg_growth
FROM census_2
WHERE state='Assam'
GROUP BY state;
----8. # FINDING THE AVERAGE SEX RATIO OF INDIA ---
SELECT AVG(sex_ratio) AS avg_sex_ratio FROM census_2;
----9. # FINDING THE AVERAGE SEX RATIO OF INDIA(STATEWISE) ---
SELECT state,AVG(sex_ratio) AS avg_sex_ratio
FROM census_2
GROUP BY state
ORDER BY avg_sex_ratio DESC;
----10. # FINDING THE AVERAGE SEX RATIO OF INDIA(ASSAM) ---
SELECT state,AVG(sex_ratio) AS avg_sex_ratio
FROM census_2
WHERE state='Assam'
GROUP BY state;
----11. # FINDING THE AVERAGE literacy rate OF INDIA ---
SELECT AVG(literacy) AS avg_literacy FROM census_2;
----12. # FINDING THE AVERAGE literacy OF INDIA(STATEWISE) ---
SELECT state,AVG(literacy) AS avg_literacy
FROM census_2
GROUP BY state
ORDER BY avg_literacy DESC;
----13. # FINDING THE AVERAGE literacy OF INDIA(ASSAM) ---
SELECT state,AVG(literacy) AS avg_literacy
FROM census_2
WHERE state='Assam'
GROUP BY state
----14. # FINDING THE AVERAGE literacy OF INDIA(STATEWISE)>90 ---
SELECT state,ROUND(AVG(literacy)) AS avg_literacy
FROM census_2
GROUP BY state
HAVING ROUND(AVG(literacy))>90
ORDER BY avg_literacy DESC;
----15. # FINDING THE TOP 3 STATES HAVING HGHEST AVERAGE POPULATION GROWTH RATE IN INDIA -----
SELECT state,ROUND(AVG(growth)) AS High_avg_growth
FROM census_2
GROUP BY state
ORDER BY High_avg_growth DESC
LIMIT 3;
----16. # FINDING THE TOP 3 STATES HAVING lowest AVERAGE POPULATION GROWTH RATE IN INDIA -----
SELECT state,ROUND(AVG(growth)) AS low_avg_growth
FROM census_2
GROUP BY state
ORDER BY low_avg_growth
LIMIT 3;
----17. # FINDING THE TOP 3 STATES HAVING HGHEST AVERAGE SEX RATIO IN INDIA -----
SELECT state,ROUND(AVG(sex_ratio)) AS High_avg_sex_ratio
FROM census_2
GROUP BY state
ORDER BY High_avg_sex_ratio DESC
LIMIT 3;
----18. # FINDING THE TOP 3 STATES HAVING lowest AVERAGE SEX RATIO IN INDIA -----
SELECT state,ROUND(AVG(sex_ratio)) AS low_avg_sex_ratio
FROM census_2
GROUP BY state
ORDER BY low_avg_sex_ratio ASC
LIMIT 3;
--- 19. #Combine the top 3 states with highest average sex ratio and top 3 states with lowest average sex ratio
--- using CTEs(Common Table Expressions) and UNION/UNION ALL operator----
WITH HighSexRatio AS (
SELECT state, ROUND(AVG(sex_ratio)) AS avg_sex_ratio
FROM census_2
GROUP BY state
ORDER BY avg_sex_ratio DESC
LIMIT 3
),
LowSexRatio AS (
SELECT state, ROUND(AVG(sex_ratio)) AS avg_sex_ratio
FROM census_2
GROUP BY state
ORDER BY avg_sex_ratio ASC
LIMIT 3
)
SELECT 'Top 3 High Sex Ratio' AS category, state, avg_sex_ratio
FROM HighSexRatio
UNION ALL
SELECT 'Top 3 Low Sex Ratio' AS category, state, avg_sex_ratio
FROM LowSexRatio;
--- 20. #Combine the top 3 states with highest average literacy rate and top 3 states with lowest average literacy
--- rate using CTEs(Common Table Expressions) and UNION/UNION ALL operator----
WITH High_Literacy AS (
SELECT state,ROUND(AVG(literacy)) AS avg_literacy
FROM census_2
GROUP BY state
ORDER BY avg_literacy DESC
LIMIT 3
),
Low_Literacy AS (
SELECT state,ROUND(AVG(literacy)) AS avg_literacy
FROM census_2
GROUP BY State
ORDER BY avg_literacy ASC
LIMIT 3
)
SELECT 'High_literacy_rate' AS category, state, avg_literacy
FROM High_Literacy
UNION ALL
SELECT 'Low_literacy_rate' AS category, state, avg_literacy
FROM Low_Literacy;
---- # 21. Finding the name of states which starts with 'a' & 'b'
SELECT DISTINCT state FROM census_2
WHERE state LIKE 'A_sa_' OR state LIKE 'B%';
--- OR (if we want to view unique rows of data in a column, we can use either DINSTINCT or GROUP BY Clause)
SELECT state FROM census_2
WHERE Lower(state) LIKE 'a%' OR Lower(state) LIKE 'b%'
GROUP BY state;
---- # 22. Finding the name of states which ends with 'a'.
SELECT DISTINCT state FROM census_2
WHERE state LIKE '%a';
---- # 23. Finding out the total number of males and females in the different states of India.(in terms of
--- populualtion and sex_ratio/gender ratio)
-----# (We can join two or more tables even if without the foreign key, if we have commom expression or atleast
------ one row of data common in both the table)
--- Since we have only sex_ratio and population columns in our database tables, so we need to do statiscal analysis
--- to derive a formula to find the total no. of males and females in all the states of India.
--- sex_ratio = females/males -------> (1)
--- population = males+female (disregarding other genders) ------>(2)
--- females=population-males -------->(3)
--- substituting (3) into (1)
--- sex_ratio*males=(population-males)
--- sex_ratio*males+males = population
--- population= males(sex_ratio+1)
--- males = population/(sex_ratio+1) -----(Total males) -----(4)
--- substituting (4) into (3)
--- females = population-population/(sex_ratio+1)
--- females = population(1-1/(sex_ratio+1))
--- feamles = (population*(sex_ratio))/(sex_ratio+1) --- (Total females)
SELECT d.state,SUM(d.Total_males)AS T_males,SUM(d.Total_females) AS T_females
FROM
(SELECT c.district,c.state,ROUND(c.population/(c.sex_ratio+1)) Total_males,
ROUND((c.population*(c.sex_ratio))/(c.sex_ratio+1)) Total_females
FROM (SELECT c1.district,c1.state,c1.sex_ratio/1000 AS sex_ratio,c2.population
FROM census_2 AS c1 INNER JOIN census_1 AS c2 ON c1.district=c2.district) AS c)AS d
GROUP BY state
ORDER BY state;
--- # 24. Finding out the total number of literate and illiterate people in different states of India(in terms of
--- populualtion and literacy rate)
---- Here also, we will use literacy rate and population column to find out the desired result
--- literacy_rate = total literate people/population
--- total literate people = literacy_rate*population--------(1) -----(T_L_P)
--- total illitearate people = population - total literate people-----(2)
--- Substituting (1) into (2)
--- total illiterate people = population -(literacy_rate*population)
--- Factor out the common term 'population'
--- total illiterate people = population(1-literacy_rate*1)
--- total illiterate people = (1-literacy_rate)*population -----(T_I_P)
SELECT d.state,SUM(d.Total_literate) Total_literateP,SUM(d.Total_illiterate) Total_illiterateP FROM
(SELECT c.district,c.state,ROUND(c.literacy_rate*c.population) AS Total_literate,
ROUND((1-c.literacy_rate)*c.population) AS Total_illiterate
FROM (SELECT c2.district,c2.state,c2.literacy/100 AS literacy_rate,c1.population
FROM census_1 c1 INNER JOIN census_2 c2
ON c1.district=c2.district ) AS c) AS d
GROUP BY state
ORDER BY 2 DESC, 3 DESC;
--- # 25. Finding the previous census poupulation of states in India(in terms of current populualtion and growth rate)
--- Here also we use statistical analysis to find out the desried result by deriving the formula
--- previous_census+growth*previous_census=population
--- previous_census(1+growth)=population
--- previous_census=population/(1+growth)
SELECT q.state,ROUND(AVG(q.g_r)) AS g_r,SUM(q.prev_census_population) p_c_p,SUM(q.current_census_population) c_c_p FROM
(SELECT p.district,p.state,p.growth_rate*100 AS g_r,
ROUND(p.population/(1+p.growth_rate)) AS prev_census_population,p.population current_census_population FROM
(SELECT c2.district,c2.state,c2.growth/100 growth_rate,c1.population
FROM census_1 c1 INNER JOIN census_2 c2 ON c1.district=c2.district) AS p) AS q
GROUP BY state
ORDER BY 2 DESC;
--- #26. Finding the total previous census poupulation in India(in terms of current populualtion and growth rate)
SELECT SUM(r.p_c_p) prev_census_population, SUM(r.c_c_p) current_census_population FROM
(SELECT q.state,ROUND(AVG(q.g_r)) AS g_r,SUM(q.prev_census_population) p_c_p,SUM(q.current_census_population) c_c_p FROM
(SELECT p.district,p.state,p.growth_rate*100 AS g_r,
ROUND(p.population/(1+p.growth_rate)) AS prev_census_population,p.population current_census_population FROM
(SELECT c2.district,c2.state,c2.growth/100 growth_rate,c1.population
FROM census_1 c1 INNER JOIN census_2 c2 ON c1.district=c2.district) AS p) AS q
GROUP BY state
ORDER BY 2 DESC) AS r;
--- #27. Finding out the population density of different states of India
--- population_density = total population of an area/total land area
SELECT c.state,SUM(c.area_km2) area_km2,SUM(c.poulation_density_per_km2) poulation_density_per_km2 FROM
(SELECT district,state,area_km2, population/area_km2 AS poulation_density_per_km2
FROM census_1)AS c
GROUP BY state
ORDER BY 3 DESC;
--- # 28. Finding top 3 districts from each state of India with highest literacy rate
--- (If we have to find/segregated certain value(s) from a group, we can use ranking functions best on your uses cases)
SELECT n.* FROM
(SELECT district,state,literacy,
RANK() OVER(PARTITION BY state ORDER BY literacy DESC) AS rnk
FROM census_2) n
WHERE rnk IN (1,2,3)
ORDER BY state;