Data-visualization/data_visualization.py at main · GitProSolutions/Data-visualization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd              # import Pandas library for data manipulation and analysis
import matplotlib.pyplot as plt  # import Matplotlib library for data visualization

# Load the data (replace 'filename.csv' with the name of your file)
df = pd.read_csv('filename.csv')

# Clean the data by removing any missing values or duplicates
df.dropna(inplace=True)         # remove any rows with missing values
df.drop_duplicates(inplace=True) # remove any duplicate rows

# Group the data by a column of your choice and calculate the average of another column
grouped_data = df.groupby('column_to_group_by').agg({'column_to_average': 'mean'})

# Calculate the standard deviation of the data
std_dev = df['column_to_average'].std()

# Plot the results (replace 'Title' and 'X Label' and 'Y Label' with your own labels)
fig, ax = plt.subplots()         # create a new figure and axis for the plot
grouped_data.plot(ax=ax)         # plot the data on the axis
ax.set_title('Title')            # set the title of the plot
ax.set_xlabel('X Label')         # set the label for the x-axis
ax.set_ylabel('Y Label')         # set the label for the y-axis

# Add a horizontal line to the plot to indicate the standard deviation
ax.axhline(y=std_dev, color='r', linestyle='--', label='Standard Deviation')
ax.legend()                      # display a legend for the plot

plt.show()                       # display the plot