HP3
............PLOT1
import matplotlib.pyplot as plt
Take input from the user
x = list(map(float, input("Enter x values separated by spaces: ").split()))
y = list(map(float, input("Enter y values separated by spaces: ").split()))
Check if x and y are of the same length
if len(x) != len(y):
print("Error: x and y must have the same number of elements.")
else:
# Create the line plot
plt.plot(x, y)
# Set the title and axis labels
plt.title("Line Plot from User Input")
plt.xlabel("X Values")
plt.ylabel("Y Values")
# Display the plot
plt.show()
.............BAR PLOT
import matplotlib.pyplot as plt
Take input from the user
categories = input("Enter category labels separated by spaces: ").split()
values = list(map(float, input("Enter corresponding values separated by spaces: ").split()))
Optional: Define colors (reuse or generate dynamically)
colors = ['red', 'green', 'blue', 'orange', 'purple', 'cyan', 'magenta', 'yellow']
colors = colors[:len(categories)] # Trim to match number of categories
Check if input lengths match
if len(categories) != len(values):
print("Error: Number of categories and values must match.")
else:
# Create the bar plot
plt.bar(categories, values, color=colors)
# Set the title and axis labels
plt.title("Bar Plot from User Input")
plt.xlabel("Categories")
plt.ylabel("Values")
# Display the plot
plt.show()
....................SCATTER
import matplotlib.pyplot as plt
Get number of points from the user
n = int(input("Enter the number of data points: "))
Initialize empty lists
x = []
y = []
sizes = []
colors = []
Collect data from the user
for i in range(n):
print(f"\nData Point {i+1}:")
x_val = float(input(" Enter x value: "))
y_val = float(input(" Enter y value: "))
size = int(input(" Enter size of the point (e.g., 50): "))
color = input(" Enter color of the point (e.g., red, #00FF00): ")
x.append(x_val)
y.append(y_val)
sizes.append(size)
colors.append(color)
Create the scatter plot
plt.scatter(x, y, s=sizes, c=colors, alpha=0.7, edgecolors='black')
Set the title and axis labels
plt.title("Scatter Plot Example")
plt.xlabel("X Values")
plt.ylabel("Y Values")
Display the plot
plt.show()
.......HISTOGRAM
import matplotlib.pyplot as plt
def plot_histogram():
try:
data = list(map(float, input("Enter numeric values separated by spaces: ").split()))
bins = int(input("Enter number of bins: "))
plt.hist(data, bins=bins, color='skyblue', edgecolor='black')
plt.title("Histogram Example")
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
except ValueError:
print("Invalid input. Please enter numeric values.")
def plot_boxplot():
try:
n = int(input("Enter the number of datasets: "))
datasets = []
for i in range(n):
data = list(map(float, input(f"Enter numeric values for dataset {i+1}, separated by spaces: ").split()))
datasets.append(data)
plt.boxplot(datasets)
plt.title("Boxplot Example")
plt.grid(True)
plt.show()
except ValueError:
print("Invalid input. Please enter numeric values.")
def plot_piechart():
try:
categories = input("Enter category names separated by commas: ").split(',')
values = list(map(float, input("Enter corresponding numeric values separated by spaces: ").split()))
if len(categories) != len(values):
print("The number of categories and values must be the same.")
return
explode = [0.1 if i == 0 else 0 for i in range(len(categories))] # highlight the first slice
plt.pie(values, labels=categories, explode=explode, autopct='%1.1f%%', startangle=140)
plt.title("Pie Chart Example")
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
except ValueError:
print("Invalid input. Please ensure numeric values are entered correctly.")
def main():
while True:
print("\n--- Graph Generator Menu ---")
print("1. Histogram")
print("2. Boxplot")
print("3. Pie Chart")
print("4. Exit")
choice = input("Enter your choice (1-4): ")
if choice == '1':
plot_histogram()
elif choice == '2':
plot_boxplot()
elif choice == '3':
plot_piechart()
elif choice == '4':
print("Exiting program. Goodbye!")
break
else:
print("Invalid choice. Please enter a number between 1 and 4.")
if name == "main":
main()
...............UNIVARIATE
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
Input: list of numeric values from the user
data_input = input("Enter numbers separated by commas: ")
data = list(map(float, data_input.split(',')))
Measures of Central Tendency
mean = statistics.mean(data)
median = statistics.median(data)
try:
mode = statistics.mode(data)
except statistics.StatisticsError:
mode = "No unique mode"
Measures of Dispersion
variance = statistics.variance(data)
std_dev = statistics.stdev(data)
data_range = max(data) - min(data)
Display results
print("\n--- Univariate Analysis ---")
print(f"Mean: {mean}")
print(f"Median: {median}")
print(f"Mode: {mode}")
print(f"Variance: {variance}")
print(f"Standard Deviation: {std_dev}")
print(f"Range: {data_range} (Max: {max(data)} - Min: {min(data)})")
Histogram
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.hist(data, bins=10, color='skyblue', edgecolor='black')
plt.title('Histogram')
plt.xlabel('Value')
plt.ylabel('Frequency')
Boxplot
plt.subplot(1, 2, 2)
sns.boxplot(data, color='lightgreen')
plt.title('Boxplot')
plt.tight_layout()
plt.show()
................BIVARIATE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Sample dataset (can be replaced by user input or CSV import)
data = {
'X': ['A', 'A', 'A', 'B', 'B', 'C', 'C', 'C', 'C', 'B', 'A', 'C', 'B', 'A', 'B', 'B'],
'Y': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
}
Convert to DataFrame
df = pd.DataFrame(data)
------------------ Marginal Frequencies ------------------
Marginal frequency of X
marginal_x = df['X'].value_counts().sort_index()
Marginal frequency of Y
marginal_y = df['Y'].value_counts().sort_index()
Plot marginal frequencies
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
marginal_x.plot(kind='bar', color='skyblue')
plt.title('Marginal Frequency of X')
plt.xlabel('X')
plt.ylabel('Frequency')
plt.subplot(1, 2, 2)
marginal_y.plot(kind='bar', color='salmon')
plt.title('Marginal Frequency of Y')
plt.xlabel('Y')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()
------------------ Conditional Frequencies ------------------
Conditional frequency of Y given X
conditional_freq = pd.crosstab(df['X'], df['Y'], normalize='index')
Plot grouped bar chart
conditional_freq.plot(kind='bar', stacked=False, figsize=(8, 6), colormap='Set2')
plt.title('Conditional Frequency of Y given X (Grouped Bar)')
plt.ylabel('Proportion')
plt.xlabel('X')
plt.legend(title='Y')
plt.tight_layout()
plt.show()
Plot heatmap of conditional frequencies
plt.figure(figsize=(6, 4))
sns.heatmap(conditional_freq, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Conditional Frequency Heatmap (Y | X)')
plt.ylabel('X')
plt.xlabel('Y')
plt.tight_layout()
plt.show()
...................CORRELATION
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress, pearsonr
from sklearn.datasets import load_iris
Load Iris dataset
iris = load_iris()
data = iris.data
feature_names = iris.feature_names
target = iris.target
Display feature options
print("Available Features:")
for i, name in enumerate(feature_names):
print(f"{i}: {name}")
Take user input for X and Y axis features
try:
x_index = int(input("Enter the index (0-3) for the X-axis feature: "))
y_index = int(input("Enter the index (0-3) for the Y-axis feature: "))
if x_index == y_index:
raise ValueError("X and Y features must be different.")
if not (0 <= x_index <= 3) or not (0 <= y_index <= 3):
raise ValueError("Index must be between 0 and 3.")
except ValueError as e:
print(f"Invalid input: {e}")
exit()
Extract data for selected features
x = data[:, x_index]
y = data[:, y_index]
x_label = feature_names[x_index]
y_label = feature_names[y_index]
Calculate correlation coefficient
corr_coeff, _ = pearsonr(x, y)
Perform linear regression for line of best fit
slope, intercept, r_value, p_value, std_err = linregress(x, y)
line = slope * x + intercept
Create scatter plot
plt.figure(figsize=(10, 6))
scatter = plt.scatter(x, y, c=target, s=80, cmap='plasma', alpha=0.7, edgecolors='k', label='Data Points')
plt.plot(x, line, color='red', linewidth=2, label='Line of Best Fit')
Labels and title
plt.title(f'Scatter Plot: {x_label} vs {y_label}', fontsize=14)
plt.xlabel(f'{x_label} (cm)', fontsize=12)
plt.ylabel(f'{y_label} (cm)', fontsize=12)
Show correlation coefficient
plt.text(min(x)+0.2, max(y)-0.1, f'Correlation Coefficient: {corr_coeff:.2f}', fontsize=12, color='blue')
Legend and display
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
Interpretation
print("\nINTERPRETATION:")
if corr_coeff > 0:
print("There is a POSITIVE CORRELATION between the selected features.")
print("As the X feature increases, the Y feature tends to increase.")
elif corr_coeff < 0:
print("There is a NEGATIVE CORRELATION between the selected features.")
print("As the X feature increases, the Y feature tends to decrease.")
else:
print("There is NO CORRELATION between the selected features.")