-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtight_end_code
More file actions
88 lines (81 loc) · 2.2 KB
/
tight_end_code
File metadata and controls
88 lines (81 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
-- inserting data into tight_end from csv
COPY tight_end
FROM
'C:\Users\bhamm\OneDrive\Documents\FFdb_raw\mySQL\TE.csv'
DELIMITER ','
CSV HEADER;
-- counting week, player (should have 349), games, and season rows
SELECT
COUNT(DISTINCT week),
COUNT(DISTINCT player),
COUNT(DISTINCT games),
COUNT(DISTINCT season)
FROM
tight_end;
-- filtering out and deleting tight ends with no targets or carries in 2023
-- players without 2023 stats add no value to the database
-- first I first count the number of players being dropped before dropping them
SELECT
player
FROM
tight_end
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(targets) = 0
AND SUM(carries) = 0;
-- the results of the above query shows 85 players that needed dropped
-- dropping players
DELETE FROM tight_end
WHERE player IN (
SELECT
player
FROM
tight_end
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(targets) = 0
AND SUM(carries) = 0);
-- running the query below returns 264 players which is 128 less than the we started with
SELECT
COUNT(DISTINCT player)
FROM
tight_end;
-- inserting a new field called position
-- this field will help filter data once we model the database
ALTER TABLE tight_end
ADD COLUMN position varchar(3);
-- Assigning the position abbreviation to the new field
UPDATE tight_end
SET position = 'TE';
-- trimming all whitespaces from text fields
SELECT
trim(player),
trim(team_code),
trim(position)
FROM
tight_end;
-- checking for duplicates by cross referencing games played per season
-- creating a pivot table using the FILTER clause to display games played per season
-- max games played per season from 2023-2021 = 17 and from 2020-2018 = 16
-- players with more games played than that suggest errors in the data
SELECT
DISTINCT player,
COALESCE(SUM(games) FILTER(WHERE season = 2023),0) AS "2023",
COALESCE(SUM(games) FILTER(WHERE season = 2022),0) AS "2022",
COALESCE(SUM(games) FILTER(WHERE season = 2021),0) AS "2021",
COALESCE(SUM(games) FILTER(WHERE season = 2020),0) AS "2020",
COALESCE(SUM(games) FILTER(WHERE season = 2019),0) AS "2019",
COALESCE(SUM(games) FILTER(WHERE season = 2018),0) AS "2018"
FROM
tight_end
GROUP BY
player
ORDER BY
"2023" DESC,
player;