-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvisualize.py
More file actions
129 lines (115 loc) · 4.4 KB
/
visualize.py
File metadata and controls
129 lines (115 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import configparser
import os
import logging
from sqlalchemy import create_engine, text
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('pipeline.log'),
logging.StreamHandler()
]
)
# Read the config file
config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))
snowflake_config = config['SNOWFLAKE']
def get_snowflake_connection():
"""Create and return a Snowflake SQLAlchemy engine"""
try:
account = snowflake_config['account'].strip('"\'')
engine = create_engine(
f"snowflake://{snowflake_config['user']}:{snowflake_config['password']}@{account}/"
f"{snowflake_config['database']}/{snowflake_config['schema']}?"
f"warehouse={snowflake_config['warehouse']}&role={snowflake_config['role']}"
)
logging.info("Snowflake engine created successfully")
return engine
except Exception as e:
logging.error(f"Failed to create Snowflake engine: {str(e)}")
raise
def visualize_insights():
"""Generate visualizations from the Gold and Silver layers"""
engine = None
try:
engine = get_snowflake_connection()
logging.info("Starting visualization generation")
os.makedirs("example", exist_ok=True)
# Fetch Gold data
gold_query = text("""
SELECT
GENDER,
DEPRESSION_RATE,
SUICIDAL_THOUGHTS_RATE,
AVG_CGPA,
AVG_ACADEMIC_PRESSURE
FROM GOLD_STUDENT_INSIGHTS
""")
with engine.connect() as conn:
df = pd.read_sql(gold_query, conn)
df.columns = df.columns.str.lower()
logging.info(f"Retrieved {len(df)} records from GOLD_STUDENT_INSIGHTS")
print("Gold DataFrame columns:", df.columns.tolist())
print("Gold DataFrame head:\n", df.head())
# Plot 1: Depression Rate by Gender
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x="gender", y="depression_rate", palette="viridis")
plt.title("Depression Rate by Gender")
plt.ylabel("Depression Rate")
plt.xlabel("Gender")
plt.savefig("example/depression_rate.png", bbox_inches='tight', dpi=300)
plt.close()
logging.info("Generated depression_rate.png")
# Fetch Silver data
silver_query = text("""
SELECT
CGPA,
ACADEMIC_PRESSURE,
GENDER,
AGE as STUDENT_AGE
FROM SILVER_STUDENT_DATA
LIMIT 1000
""")
with engine.connect() as conn:
silver_df = pd.read_sql(silver_query, conn)
silver_df.columns = silver_df.columns.str.lower()
logging.info(f"Retrieved {len(silver_df)} records from SILVER_STUDENT_DATA")
print("Silver DataFrame columns:", silver_df.columns.tolist())
print("Silver DataFrame head:\n", silver_df.head())
# Plot 2: CGPA vs Academic Pressure by Gender
plt.figure(figsize=(10, 6))
sns.scatterplot(
data=silver_df,
x="cgpa",
y="academic_pressure",
hue="gender",
alpha=0.6
)
plt.title("CGPA vs Academic Pressure by Gender")
plt.savefig("example/cgpa_pressure.png", bbox_inches='tight', dpi=300)
plt.close()
logging.info("Generated cgpa_pressure.png")
# Plot 3: Age Distribution
if 'student_age' in silver_df.columns:
plt.figure(figsize=(10, 6))
sns.histplot(data=silver_df, x="student_age", kde=True, bins=20)
plt.title("Age Distribution of Students")
plt.savefig("example/age_distribution.png", bbox_inches='tight', dpi=300)
plt.close()
logging.info("Generated age_distribution.png")
else:
logging.warning("student_age column not found in silver data")
logging.info("Visualizations generated successfully in example/ folder.")
except Exception as e:
logging.error(f"Visualization generation failed: {str(e)}")
raise
finally:
if engine:
engine.dispose()
logging.info("Snowflake engine disposed")
if __name__ == "__main__":
visualize_insights()