Install the required libraries using pip:
pip install pandas matplotlib seaborn
import pandas as pd # For data manipulation
import matplotlib.pyplot as plt # For basic plotting
import seaborn as sns # For advanced statistical visualizations
df = pd.read_csv('your_dataset.csv')
If your data is from another source (Excel, SQL, etc.), you can modify the code accordingly.
If you do not have your data you can use Sample data
df.head()
df.describe()
df.isnull().sum()
df.dtypes
df_clean = df.dropna()
df['column_name'].fillna(df['column_name'].mean(), inplace=True)
df_clean = df.drop_duplicates()
plt.figure(figsize=(10, 6))
plt.plot(df['date_column'], df['value_column'])
plt.title('Line Plot of Values Over Time')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()
plt.figure(figsize=(10, 6))
sns.barplot(x='category_column', y='value_column', data=df)
plt.title('Bar Plot of Categories vs Values')
plt.xlabel('Category')
plt.ylabel('Value')
plt.show()
plt.figure(figsize=(10, 6))
plt.hist(df['numeric_column'], bins=20, color='blue', alpha=0.7)
plt.title('Histogram of Numeric Column')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(x='category_column', y='value_column', data=df)
plt.title('Box Plot of Values by Category')
plt.xlabel('Category')
plt.ylabel('Value')
plt.show()
plt.figure(figsize=(10, 6))
plt.scatter(df['numeric_column_1'], df['numeric_column_2'])
plt.title('Scatter Plot of Numeric Column 1 vs Numeric Column 2')
plt.xlabel('Numeric Column 1')
plt.ylabel('Numeric Column 2')
plt.show()
sns.pairplot(df[['numeric_column_1', 'numeric_column_2', 'numeric_column_3']])
plt.show()
plt.figure(figsize=(10, 6))
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
grouped_data = df.groupby('category_column')['value_column'].mean()
print(grouped_data)
aggregated_data = df.groupby('category_column').agg({
'value_column': ['mean', 'sum', 'count']
})
print(aggregated_data)
pivot = df.pivot_table(values='value_column', index='category_column', columns='another_category_column', aggfunc='mean')
print(pivot)
crosstab = pd.crosstab(df['category_column_1'], df['category_column_2'])
print(crosstab)
correlation = df['numeric_column_1'].corr(df['numeric_column_2'])
print(f'Correlation: {correlation}')
plt.figure(figsize=(10, 6))
sns.violinplot(x='category_column', y='numeric_column', data=df)
plt.title('Violin Plot of Numeric Column by Category')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='category_column', data=df)
plt.title('Count Plot of Categories')
plt.show()
g = sns.FacetGrid(df, col='category_column', height=5)
g.map(plt.hist, 'numeric_column', bins=20)
plt.show()
plt.figure(figsize=(10, 6))
sns.scatterplot(x='numeric_column_1', y='numeric_column_2', data=df)
plt.title('Scatter Plot Example')
plt.savefig('scatter_plot.png')
df_clean.to_csv('processed_data.csv', index=False)
df_clean.to_excel('processed_data.xlsx', index=False)