Experiment cse skill
all files=
githubExperiment 1: Basic string Operations
print("Basic Strings Operation in Python:")
word1 = input("Enter any word: ")
word2 = input("Enter any word: ")
print("Concatenation of word1 and word2: ")
print(word1+" "+word2)
print(f"To capitalize all letter in word1: {word1.upper()} ")
print(f"To make all letter in word1 into lower case: {word1.lower()} ")
print(f"Slicing in Python: {word1[::-1]}")
print(f"Number of times letter 'e' appeared in word1: {word1.count('e')}")
print(f"The position of letter 'd' in word2: {word2.index('d')}")
print("\nBasic Maths operations in Python: ")
a = int(input("Enter number 1: "))
b = int(input("Enter number 2: "))
print(f"Addition of {a} and {b}: {a+b}")
print(f"Subtraction of {a} and {b}: {a-b}")
print(f"Multiplication of {a} and {b}: {a*b}")
print(f"Division of {a} and {b}: {a/b}")
print(f"Floor division of {a} and {b}: {a//b}")
Experiment 2:Perform mathematical functions using python
def ADD(a, b):
return a + b
def SUB(a, b):
return a - b
def MUL(a, b):
return a * b
def DIV(a, b):
return a / b
def POW(a, b):
return a ** b
def Square(a):
return a * a
def Factorial(a):
fact=1
for i in range(1,a+1):
fact = fact * i
return fact
def Fibonacci(x):
a=0
b=1
print(a,end="\t")
print(b,end="\t")
for i in range(1,x+1):
c = a+b
print(c,end="\t")
a = b
b = c
a = int(input("Enter number 1: "))
b = int(input("Enter number 2: "))
print(f"Addition of {a} and {b} is {ADD(a, b)}")
print(f"Subtraction of {a} and {b} is {SUB(a, b)}")
print(f"Multiplication of {a} and {b} is {MUL(a, b)}")
print(f"Division of {a} and {b} is {DIV(a, b)}")
print(f"Square of {a} is {Square(a)}")
print(f"Factorial of {a} is {Factorial(a)}")
x = int(input("Enter any number for fibonacci series: "))
print(f"Fibonacci series till {x}:")
Fibonacci(x)
Experiment 3: Program to read, write and modify text file data using object oriented python
import os
def create_file(filename):
try:
with open(filename, 'w') as f:
f.write('This is a new file created in python!\n')
print("File " + filename + " created successfully.")
except IOError:
print("Error: could not create file " + filename)
def read_file(filename):
try:
with open(filename, 'r') as f:
contents = f.read()
print(contents)
except IOError:
print("Error: could not read file " + filename)
def append_file(filename, text):
try:
with open(filename, 'a') as f:
f.write(text)
print("Text appended to file " + filename + " successfully.")
except IOError:
print("Error: could not append to file " + filename)
def rename_file(filename, new_filename):
try:
os.rename(filename, new_filename)
print("File " + filename + " renamed to " + new_filename + " successfully.")
except IOError:
print("Error: could not rename file " + filename)
def delete_file(filename):
try:
os.remove(filename)
print("File " + filename + " deleted successfully.")
except IOError:
print("Error: could not delete file " + filename)
if _name_ == "_main_":
print("File handling in Python")
while 1:
print("\n1. Create a file\n2. Read a file\n3. Rename a file\n4. Delete a file\n5. Append in the file\n6. Exit\n")
choice = int(input("Enter your choice: "))
if choice == 1:
f_name = input("Enter the filename: ")
create_file(f_name)
elif choice == 2:
r_name = input("Enter name of the file: ")
read_file(r_name)
elif choice == 3:
old_name = input("Enter the present name of the file: ")
new_name = input("Enter new name of the file: ")
rename_file(old_name,new_name)
elif choice == 4:
d_name = input("Enter the file name to be deleted: ")
delete_file(d_name)
elif choice == 5:
f_name = input("Enter file name to append: ")
text = input("Enter text to be appended: ")
append_file(f_name,text)
elif choice == 6:
exit(0)
Experiment Exp 4 : Implemet various pre defined libraries in python : NLTK, Scipy, Numpy matplotlib, seaborn
#numpy
import numpy as np
arr1 = np.array([[1,2,3],
[4,5,6],
[7,8,9]])
arr2 = np.array([[1,2,3],
[4,5,6],
[7,8,9]])
print("Adding 1 to every element in array:")
print(arr1+1)
print(f"\nData type of an array: {arr1.dtype}\n")
print("\nSum of array 1 and 2:")
print(arr1+arr2)
print(f"\nSum of element of array 1: {np.sum(arr1)}")
print(f"\nSquare root of Array 1 elements: \n {np.sqrt(arr1)}")
Trans_arr = arr1.T
print(f"\nTranpose of Array 2 elements: \n{Trans_arr}")
#seaborn
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("company-sales.csv")
sn.lineplot(x="months", y="total_profit", data=df)
plt.title("Months by Total-profit")
plt.show()
sn.barplot(x="months",y="facewash", data=df)
plt.title("Months by facewash profit")
#nltk
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
# nltk.download('stopwords')
# nltk.download('punkt')
text = """NLTK is a powerful library for working with human language data.
It provides easy-to-use interfaces to over 50 corpora and lexical resources, such as WordNet."""
words = word_tokenize(text)
print("Word Tokens:", words)
sentences = sent_tokenize(text)
print("\nSentences:", sentences)
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]
print("\nFiltered Words (without stopwords):", filtered_words)
#pandas
import pandas as pd
from scipy.stats import linregress
import matplotlib.pyplot as plt
df = pd.read_csv("Salary_Data.csv")
X = df['year_experience']
y = df['Salary']
slope, intercept, r_value, p_value, std_err = linregress(X, y)
print(f"Slope: {slope}")
print(f"Intercept: {intercept}")
print(f"R-squared value: {r_value**2}")
plt.scatter(X, y, label='Data points')
plt.plot(X, slope * X + intercept, color='red', label='Linear regression')
plt.xlabel('year_experience')
plt.ylabel('salary')
plt.legend()
plt.show()
link for data files:
csv file 4
Experiment 5: different functionalities of matplotlib package
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("student_marksheet_new.csv")
condition_c = df['Section'].isin(['C'])
df_c = df.loc[condition_c]
avg_percentage_of_c = df_c["Percentage"].mean()
condition_a = df['Section'].isin(['A'])
df_a = df.loc[condition_a]
avg_percentage_of_a = df_a["Percentage"].mean()
condition_b = df['Section'].isin(['B'])
df_b = df.loc[condition_b]
avg_percentage_of_b = df_b["Percentage"].mean()
section_per = [avg_percentage_of_a, avg_percentage_of_b, avg_percentage_of_c]
labels = ['A','B','C']
plt.pie(section_per, labels=labels, autopct='%1.1f%%')
plt.title('Average Percentage by Section')
plt.show()
df_sorted = df.sort_values(by='Percentage', ascending=False)
plt.barh(df_sorted['Name'], df_sorted['Percentage'])
plt.xlabel('Percentage')
plt.ylabel('Name')
plt.title('Rank Graph of Students')
plt.gca().invert_yaxis()
plt.show()
max_c = int(max(df_c["Percentage"]))
max_a = max(df_a["Percentage"])
max_b = max(df_b["Percentage"])
section_names = ['Section A', 'Section B', 'Section C']
max_percentages = [max_a, max_b, max_c]
plt.bar(section_names, max_percentages)
plt.xlabel("Section")
plt.ylabel("Maximum Percentage")
plt.title("Maximum Percentage by Section")
plt.show()
min_a = min(df_a["Maths"])
min_b = min(df_b["Maths"])
min_c = min(df_c["Maths"])
min_maths_scores = [min_a, min_b, min_c]
sec_names = ["A", "B", "C"]
plt.barh(sec_names, min_maths_scores)
plt.xlabel("Maths marks")
plt.ylabel("Section")
plt.title("Minimum Maths Marks of Each Section")
plt.show()
link for data files:
csv file 5
Experiment 6: program to measure central tendency and dispersion of given data
import pandas as pd
import numpy as np
df = pd.read_csv("student_marksheet_new.csv")
display(df.head(5))
print()
condition_c = df['Section'].isin(['C'])
df_c = df.loc[condition_c]
avg_percentage_of_c = df_c["Percentage"].mean()
print(f"Average percentage of Section C: {avg_percentage_of_c}")
median = df["Percentage"].median()
print(f"Median of Total Percentage of all sections: {median}")
condition_A = df['Section'].isin(['A'])
df_a = df.loc[condition_A]
mode_of_science_of_section_A = df_a["Science"].mode()
print(f"Mode of marks of Science in Section A: {mode_of_science_of_section_A.iloc[0]}")
total_marks_max = max(df['Total'])
total_marks_min = min(df['Total'])
print(f"Range of Total marks of all section: {total_marks_max- total_marks_min}")
maths_sec_a = df_a["Maths"]
variance = np.var(maths_sec_a)
print(f"Variance of maths marks: {variance}")
history_sec_a = df_a["History"]
standard_deviation = np.std(history_sec_a)
print(f"Standard variance of History marks of section A: {standard_deviation}")
link for data files:
csv file 6
Experiment 7: program to display different types of distributions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df2 = pd.read_csv("CompanyABCProfit.csv")
df2.rename(columns = {"Profit(Rs '000)":"profit"}, inplace=True)
df2.head()
from scipy.stats import shapiro
stat2, p2 = shapiro(df2['profit'])
if p2 > 0.05:
s2 = 'It is normally distributed.'
else:
s2 = 'It is not normally distributed.'
plt.figure(figsize=(20,6))
plt.subplot(122)
sns.histplot(df2['profit'],kde=True, color='green')
plt.axvline(df2['profit'].mean(), color='k', linestyle='dashed', linewidth=2)
plt.title("Stats:"+ str(stat2)+" , P value:" + str(p2)+ " , " + s2)
plt.show()
link for data files:
csv file 7
Experiment 8: Program to implement linear and multiple regression
#linear regression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('Salary_Data.csv')
display(dataset.head())
X = dataset.iloc[:, :-1].values #independent variable array
y = dataset.iloc[:,1].values #dependent variable vector
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/3,random_state=0)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train) #actually produces the linear eqn for the data
# predicting the test set results
y_pred = regressor.predict(X_test)
y_pred
y_test
plt.scatter(X_train, y_train, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title("Salary vs Experience (Training set)")
plt.xlabel("Years of experience")
plt.ylabel("Salaries")
plt.show()
#plot for the TEST
plt.scatter(X_test, y_test, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title("Salary vs Experience (Testing set)")
plt.xlabel("Years of experience")
plt.ylabel("Salaries")
plt.show()
#multiple regression
import pandas as pd
from sklearn.compose import ColumnTransformer
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Load dataset
dataset = pd.read_csv(r'D:\Notes\pythonProject\50_Startups.csv')
# Independent and dependent variables
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# Encode categorical data
labelencoder = LabelEncoder()
X[:, 3] = labelencoder.fit_transform(X[:, 3])
onehotencoder = ColumnTransformer([("Spend", OneHotEncoder(), [3])], remainder='passthrough')
X = onehotencoder.fit_transform(X)
# Avoiding the Dummy Variable Trap
X = X[:, 1:]
# Splitting the dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Fitting Multiple Linear Regression to the Training set
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Predicting the Test set results
y_pred = regressor.predict(X_test)
# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'Coefficient of Determination: {r2:.2f}')
# Plotting the results in 3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Scatter plot of actual values
ax.scatter(X_test[:, 0], X_test[:, 1], y_test, color='red', label='Actual')
# Meshgrid for the prediction surface
x0, x1 = np.meshgrid(np.arange(X_test[:, 0].min(), X_test[:, 0].max(), 0.1),
np.arange(X_test[:, 1].min(), X_test[:, 1].max(), 0.1))
# Predicting values for the grid
y_pred_grid = regressor.coef_[0] * x0 + regressor.coef_[1] * x1 + regressor.intercept_
# Plotting the prediction surface
ax.plot_surface(x0, x1, y_pred_grid, color='blue', alpha=0.5)
# Adding labels, title, and legend to the plot
ax.set_xlabel('R&D Spend')
ax.set_ylabel('Administration Spend')
ax.set_zlabel('Profit')
ax.set_title('Multiple Regression Plot')
# Manually adding legend
red_patch = plt.Line2D([0], [0], linestyle="none", marker='o', color='red', label='Actual')
blue_patch = plt.Line2D([0], [0], linestyle="none", marker='s', color='blue', label='Predicted')
ax.legend(handles=[red_patch, blue_patch])
plt.show()
link for data files:
csv file 8
Experiment 9: program to implement clustering and segmentation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns
customers_df = pd.read_csv(r'D:\Notes\pythonProject\Mall_Customers.csv')
X = customers_df[['Annual Income', 'Spending Score']]
inertia = []
for k in range(1, 11):
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X)
inertia.append(kmeans.inertia_)
plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal K')
plt.show()
kmeans = KMeans(n_clusters=5, random_state=42)
customers_df['Cluster'] = kmeans.fit_predict(X)
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Annual Income', y='Spending Score', hue='Cluster',data=customers_df, palette='Set1')
plt.title('Customer Segmentation')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()
link for data files:
csv file 9