Experiment cse skill

all files=

github

Experiment 1: Basic string Operations

        
print("Basic Strings Operation in Python:")
word1 = input("Enter any word: ")
word2 = input("Enter any word: ")

print("Concatenation of word1 and word2: ")
print(word1+" "+word2)

print(f"To capitalize all letter in word1: {word1.upper()} ")
print(f"To make all letter in word1 into lower case: {word1.lower()} ")
print(f"Slicing in Python: {word1[::-1]}")
print(f"Number of times letter 'e' appeared in word1: {word1.count('e')}")
print(f"The position of letter 'd' in word2: {word2.index('d')}")


print("\nBasic Maths operations in Python: ")
a = int(input("Enter number 1: "))
b = int(input("Enter number 2: "))
print(f"Addition of {a} and {b}: {a+b}")
print(f"Subtraction of {a} and {b}: {a-b}")
print(f"Multiplication of {a} and {b}: {a*b}")
print(f"Division  of {a} and {b}: {a/b}")
print(f"Floor division of {a} and {b}: {a//b}")
    
    

Experiment 2:Perform mathematical functions using python

    
    def ADD(a, b):
    return a + b

def SUB(a, b):
    return a - b

def MUL(a, b):
    return a * b

def DIV(a, b):
    return a / b

def POW(a, b):
    return a ** b

def Square(a):
    return a * a

def Factorial(a):
    fact=1
    for i in range(1,a+1):
        fact = fact * i
    return fact

def Fibonacci(x):
    a=0
    b=1
    print(a,end="\t")
    print(b,end="\t")
    for i in range(1,x+1):
        c = a+b
        print(c,end="\t")
        a = b
        b = c


a = int(input("Enter number 1: "))
b = int(input("Enter number 2: "))

print(f"Addition of {a} and {b} is {ADD(a, b)}")
print(f"Subtraction of {a} and {b} is {SUB(a, b)}")
print(f"Multiplication of {a} and {b} is {MUL(a, b)}")
print(f"Division of {a} and {b} is {DIV(a, b)}")
print(f"Square of {a} is {Square(a)}")
print(f"Factorial of {a} is {Factorial(a)}")

x = int(input("Enter any number for fibonacci series: "))
print(f"Fibonacci series till {x}:")
Fibonacci(x)
  

Experiment 3: Program to read, write and modify text file data using object oriented python

        
        

import os


def create_file(filename):
    try:
        with open(filename, 'w') as f:
            f.write('This is a new file created in python!\n')
        print("File " + filename + " created successfully.")
    except IOError:
        print("Error: could not create file " + filename)


def read_file(filename):
    try:
        with open(filename, 'r') as f:
            contents = f.read()
            print(contents)
    except IOError:
        print("Error: could not read file " + filename)


def append_file(filename, text):
    try:
        with open(filename, 'a') as f:
            f.write(text)
        print("Text appended to file " + filename + " successfully.")
    except IOError:
        print("Error: could not append to file " + filename)


def rename_file(filename, new_filename):
    try:
        os.rename(filename, new_filename)
        print("File " + filename + " renamed to " + new_filename + " successfully.")
    except IOError:
        print("Error: could not rename file " + filename)


def delete_file(filename):
    try:
        os.remove(filename)
        print("File " + filename + " deleted successfully.")
    except IOError:
        print("Error: could not delete file " + filename)


if _name_ == "_main_":
    print("File handling in Python")
    while 1:
        print("\n1. Create a file\n2. Read a file\n3. Rename a file\n4. Delete a file\n5. Append in the file\n6. Exit\n")
        choice = int(input("Enter your choice: "))
        if choice == 1:
            f_name = input("Enter the filename: ")
            create_file(f_name)
        elif choice == 2:
            r_name = input("Enter name of the file: ")
            read_file(r_name)
        elif choice == 3:
            old_name = input("Enter the present name of the file: ")
            new_name = input("Enter new name of the file: ")
            rename_file(old_name,new_name)
        elif choice == 4:
            d_name = input("Enter the file name to be deleted: ")
            delete_file(d_name)
        elif choice == 5:
            f_name = input("Enter file name to append: ")
            text = input("Enter text to be appended: ")
            append_file(f_name,text)
        elif choice == 6:
            exit(0)
    

Experiment Exp 4 : Implemet various pre defined libraries in python : NLTK, Scipy, Numpy matplotlib, seaborn

#numpy    
import numpy as np

arr1 = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

arr2 = np.array([[1,2,3],
                [4,5,6],
                [7,8,9]])

print("Adding 1 to every element in array:")
print(arr1+1)
print(f"\nData type of an array: {arr1.dtype}\n")
print("\nSum of array 1 and 2:")
print(arr1+arr2)
print(f"\nSum of element of array 1: {np.sum(arr1)}")
print(f"\nSquare root of Array 1 elements: \n {np.sqrt(arr1)}")
Trans_arr = arr1.T
print(f"\nTranpose of Array 2 elements: \n{Trans_arr}")

#seaborn
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt  

df = pd.read_csv("company-sales.csv")
sn.lineplot(x="months", y="total_profit", data=df)
plt.title("Months by Total-profit")
plt.show()

sn.barplot(x="months",y="facewash", data=df)
plt.title("Months by facewash profit")
        
#nltk
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

# nltk.download('stopwords') 

# nltk.download('punkt') 

text = """NLTK is a powerful library for working with human language data. 
It provides easy-to-use interfaces to over 50 corpora and lexical resources, such as WordNet."""

words = word_tokenize(text)
print("Word Tokens:", words)


sentences = sent_tokenize(text)
print("\nSentences:", sentences)

stop_words = set(stopwords.words('english'))

filtered_words = [word for word in words if word.lower() not in stop_words]
print("\nFiltered Words (without stopwords):", filtered_words)


#pandas

import pandas as pd
from scipy.stats import linregress
import matplotlib.pyplot as plt

df = pd.read_csv("Salary_Data.csv")

X = df['year_experience']
y = df['Salary']

slope, intercept, r_value, p_value, std_err = linregress(X, y)

print(f"Slope: {slope}")
print(f"Intercept: {intercept}")
print(f"R-squared value: {r_value**2}")

plt.scatter(X, y, label='Data points')
plt.plot(X, slope * X + intercept, color='red', label='Linear regression')
plt.xlabel('year_experience')
plt.ylabel('salary')
plt.legend()
plt.show()

link for data files: 
    
csv file 4

Experiment 5: different functionalities of matplotlib package

        
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("student_marksheet_new.csv")

condition_c = df['Section'].isin(['C'])
df_c = df.loc[condition_c]
avg_percentage_of_c = df_c["Percentage"].mean()

condition_a = df['Section'].isin(['A'])
df_a = df.loc[condition_a]
avg_percentage_of_a = df_a["Percentage"].mean()

condition_b = df['Section'].isin(['B'])
df_b = df.loc[condition_b]
avg_percentage_of_b = df_b["Percentage"].mean()

section_per = [avg_percentage_of_a, avg_percentage_of_b, avg_percentage_of_c]
labels = ['A','B','C']

plt.pie(section_per, labels=labels, autopct='%1.1f%%')
plt.title('Average Percentage by Section')
plt.show()




df_sorted = df.sort_values(by='Percentage', ascending=False)

plt.barh(df_sorted['Name'], df_sorted['Percentage'])
plt.xlabel('Percentage')
plt.ylabel('Name')
plt.title('Rank Graph of Students')
plt.gca().invert_yaxis()
plt.show()




max_c = int(max(df_c["Percentage"]))
max_a = max(df_a["Percentage"])
max_b = max(df_b["Percentage"])

section_names = ['Section A', 'Section B', 'Section C']
max_percentages = [max_a, max_b, max_c]

plt.bar(section_names, max_percentages)
plt.xlabel("Section")
plt.ylabel("Maximum Percentage")
plt.title("Maximum Percentage by Section")
plt.show()




min_a = min(df_a["Maths"])
min_b = min(df_b["Maths"])
min_c = min(df_c["Maths"])

min_maths_scores = [min_a, min_b, min_c]
sec_names = ["A", "B", "C"]

plt.barh(sec_names, min_maths_scores)
plt.xlabel("Maths marks")
plt.ylabel("Section")
plt.title("Minimum Maths Marks of Each Section")
plt.show()

link for data files:
    
csv file 5

Experiment 6: program to measure central tendency and dispersion of given data

        
import pandas as pd
import numpy as np
df = pd.read_csv("student_marksheet_new.csv")

display(df.head(5))
print()

condition_c = df['Section'].isin(['C'])
df_c = df.loc[condition_c]
avg_percentage_of_c = df_c["Percentage"].mean()

print(f"Average percentage of Section C: {avg_percentage_of_c}")

median = df["Percentage"].median()
print(f"Median of Total Percentage of all sections: {median}")

condition_A = df['Section'].isin(['A'])
df_a = df.loc[condition_A]

mode_of_science_of_section_A = df_a["Science"].mode()
print(f"Mode of marks of Science in Section A: {mode_of_science_of_section_A.iloc[0]}")


total_marks_max = max(df['Total'])
total_marks_min = min(df['Total'])
print(f"Range of Total marks of all section: {total_marks_max- total_marks_min}")

maths_sec_a = df_a["Maths"]
variance = np.var(maths_sec_a)
print(f"Variance of maths marks: {variance}")

history_sec_a = df_a["History"]
standard_deviation = np.std(history_sec_a)

print(f"Standard variance of History marks of section A:  {standard_deviation}")


link for data files:
    
csv file 6

Experiment 7: program to display different types of distributions

        
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df2 = pd.read_csv("CompanyABCProfit.csv")
df2.rename(columns = {"Profit(Rs '000)":"profit"}, inplace=True)
df2.head()


from scipy.stats import shapiro

stat2, p2 = shapiro(df2['profit'])
    
if p2 > 0.05:
    s2 = 'It is normally distributed.'
else:
    s2 = 'It is not normally distributed.'

plt.figure(figsize=(20,6))
  
plt.subplot(122)
sns.histplot(df2['profit'],kde=True, color='green')
plt.axvline(df2['profit'].mean(), color='k', linestyle='dashed', linewidth=2)

plt.title("Stats:"+ str(stat2)+" ,  P value:" + str(p2)+ "  , " + s2)


plt.show() 

link for data files:
    
csv file 7

Experiment 8: Program to implement linear and multiple regression

        
#linear regression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
dataset = pd.read_csv('Salary_Data.csv')
display(dataset.head())

X = dataset.iloc[:, :-1].values  #independent variable array
y = dataset.iloc[:,1].values  #dependent variable vector


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/3,random_state=0)


from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train) #actually produces the linear eqn for the data

# predicting the test set results
y_pred = regressor.predict(X_test) 

y_pred

y_test
 
plt.scatter(X_train, y_train, color='red') 
plt.plot(X_train, regressor.predict(X_train), color='blue') 
plt.title("Salary vs Experience (Training set)") 
 
plt.xlabel("Years of experience") 
plt.ylabel("Salaries") 
plt.show() 

#plot for the TEST
plt.scatter(X_test, y_test, color='red') 
plt.plot(X_train, regressor.predict(X_train), color='blue') 
plt.title("Salary vs Experience (Testing set)")
 
plt.xlabel("Years of experience") 
plt.ylabel("Salaries") 
plt.show() 


    
  


#multiple regression  

import pandas as pd
from sklearn.compose import ColumnTransformer
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Load dataset
dataset = pd.read_csv(r'D:\Notes\pythonProject\50_Startups.csv')

# Independent and dependent variables
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Encode categorical data
labelencoder = LabelEncoder()
X[:, 3] = labelencoder.fit_transform(X[:, 3])
onehotencoder = ColumnTransformer([("Spend", OneHotEncoder(), [3])], remainder='passthrough')
X = onehotencoder.fit_transform(X)

# Avoiding the Dummy Variable Trap
X = X[:, 1:]

# Splitting the dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Fitting Multiple Linear Regression to the Training set
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predicting the Test set results
y_pred = regressor.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'Coefficient of Determination: {r2:.2f}')

# Plotting the results in 3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Scatter plot of actual values
ax.scatter(X_test[:, 0], X_test[:, 1], y_test, color='red', label='Actual')

# Meshgrid for the prediction surface
x0, x1 = np.meshgrid(np.arange(X_test[:, 0].min(), X_test[:, 0].max(), 0.1),
                     np.arange(X_test[:, 1].min(), X_test[:, 1].max(), 0.1))

# Predicting values for the grid
y_pred_grid = regressor.coef_[0] * x0 + regressor.coef_[1] * x1 + regressor.intercept_

# Plotting the prediction surface
ax.plot_surface(x0, x1, y_pred_grid, color='blue', alpha=0.5)

# Adding labels, title, and legend to the plot
ax.set_xlabel('R&D Spend')
ax.set_ylabel('Administration Spend')
ax.set_zlabel('Profit')
ax.set_title('Multiple Regression Plot')

# Manually adding legend
red_patch = plt.Line2D([0], [0], linestyle="none", marker='o', color='red', label='Actual')
blue_patch = plt.Line2D([0], [0], linestyle="none", marker='s', color='blue', label='Predicted')
ax.legend(handles=[red_patch, blue_patch])

plt.show()
link for data files:
csv file 8

Experiment 9: program to implement clustering and segmentation

        
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns

customers_df = pd.read_csv(r'D:\Notes\pythonProject\Mall_Customers.csv')

X = customers_df[['Annual Income', 'Spending Score']]

inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal K')
plt.show()

kmeans = KMeans(n_clusters=5, random_state=42)
customers_df['Cluster'] = kmeans.fit_predict(X)

plt.figure(figsize=(10, 6))
sns.scatterplot(x='Annual Income', y='Spending Score', hue='Cluster',data=customers_df, palette='Set1')
plt.title('Customer Segmentation')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()

link for data files:
    
csv file 9

Comments

Popular posts from this blog

Machine Learning

Blockchain Experiments

java exp