Kssaijeshwanth

Wednesday, November 10, 2021

IV CSE 3 ML LAB 2021

EX NO: 1

DECISION TREE BASED ID3 ALGORITHM

DATE:

AIM:

To write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample.

Source Code:

import numpy as np import math

from data_loader import read_data

class Node:

def init (self, attribute): self.attribute = attribute self.children = [] self.answer = ""

def str (self): return self.attribute

def subtables(data, col, delete): dict = {}

items = np.unique(data[:, col])

count = np.zeros((items.shape[0], 1), dtype=np.int32) for x in range(items.shape[0]):

for y in range(data.shape[0]):

if data[y, col] == items[x]: count[x] += 1

for x in range(items.shape[0]):

dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")

pos = 0

for y in range(data.shape[0]): if data[y, col] == items[x]:

dict[items[x]][pos] = data[y] pos += 1

if delete:

dict[items[x]] = np.delete(dict[items[x]], col, 1) return items, dict

def entropy(S):

Iitems = np.unique(S) if items.size == 1:

return 0

counts = np.zeros((items.shape[0], 1)) sums = 0

for x in range(items.shape[0]):

counts[x] = sum(S == items[x]) / (S.size * 1.0)

for count in counts:

sums += -1 * count * math.log(count, 2) return sums

def gain_ratio(data, col):

items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]

entropies = np.zeros((items.shape[0], 1)) intrinsic = np.zeros((items.shape[0], 1)) for x in range(items.shape[0]):

ratio = dict[items[x]].shape[0]/(total_size * 1.0) entropies[x] = ratio * entropy(dict[items[x]][:, -1]) intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1]) iv = -1 * sum(intrinsic)

for x in range(entropies.shape[0]): total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):

if (np.unique(data[:, -1])).shape[0] == 1: node = Node("")

node.answer = np.unique(data[:, -1])[0] return node

gains = np.zeros((data.shape[1] - 1, 1)) for col in range(data.shape[1] - 1):

gains[col] = gain_ratio(data, col) split = np.argmax(gains)

node = Node(metadata[split])

metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):

child = create_node(dict[items[x]], metadata) node.children.append((items[x], child))

return node def empty(size):

s = ""

for x in range(size): s += " "

return s

def print_tree(node, level): if node.answer != "":

print(empty(level), node.answer) return

print(empty(level), node.attribute) for value, n in node.children:

print(empty(level + 1), value) print_tree(n, level + 2)

metadata, traindata = read_data("tennis.csv") data = np.array(traindata)

node = create_node(data, metadata) print_tree(node, 0)

Data_loader.py

import csv

def read_data(filename):

with open(filename, 'r') as csvfile:

datareader = csv.reader(csvfile, delimiter=',') headers = next(datareader)

metadata = [] traindata = []

for name in headers: metadata.append(name)

for row in datareader: traindata.append(row)

return (metadata, traindata)

Tennis.csv

outlook,temperature,humidity,wind, answer sunny,hot,high,weak,no sunny,hot,high,strong,no overcast,hot,high,weak,yes rain,mild,high,weak,yes rain,cool,normal,weak,yes rain,cool,normal,strong,no overcast,cool,normal,strong,yes sunny,mild,high,weak,no sunny,cool,normal,weak,yes rain,mild,normal,weak,yes sunny,mild,normal,strong,yes overcast,mild,high,strong,yes overcast,hot,normal,weak,yes rain,mild,high,strong,no

Output

outlook

overcast b'yes'

rain

wind

b'strong' b'no' b'weak' b'yes'

sunny

humidity b'high' b'no'

b'normal' b'yes

RESULT:

EX NO: 2

BACK PROPAGATION ALGORITHM

AIM:

To write a program for implementing the Back propagation algorithm and test the same using appropriate data sets.

Source Code:

import numpy as np

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)

y = np.array(([92], [86], [89]), dtype=float)

X = X/np.amax(X,axis=0) # maximum of X array longitudinally y = y/100

#Sigmoid Function def sigmoid (x):

return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function def derivatives_sigmoid(x):

return x * (1 - x)

#Variable initialization

epoch=7000 #Setting training iterations lr=0.1 #Setting learning rate

inputlayer_neurons = 2 #number of features in data set hiddenlayer_neurons = 3 #number of hidden layers neurons output_neurons = 1 #number of neurons at output layer #weight and bias initialization

wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) bh=np.random.uniform(size=(1,hiddenlayer_neurons)) wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) bout=np.random.uniform(size=(1,output_neurons))

#draws a random range of numbers uniformly of dim x*y for i in range(epoch):

#Forward Propogation hinp1=np.dot(X,wh) hinp=hinp1 + bh hlayer_act = sigmoid(hinp)

outinp1=np.dot(hlayer_act,wout) outinp= outinp1+ bout

output = sigmoid(outinp)

#Backpropagation EO = y-output

outgrad = derivatives_sigmoid(output) d_output = EO* outgrad

EH = d_output.dot(wout.T)

hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error

d_hiddenlayer = EH * hiddengrad

wout += hlayer_act.T.dot(d_output) *lr# dotproduct of nextlayererror and currentlayerop

# bout += np.sum(d_output, axis=0,keepdims=True) *lr wh += X.T.dot(d_hiddenlayer) *lr

#bh += np.sum(d_hiddenlayer, axis=0,keepdims=True) *lr print("Input: \n" + str(X))

print("Actual Output: \n" + str(y)) print("Predicted Output: \n" ,output)

output

Input:

[[ 0.66666667 1. ]

[ 0.33333333 0.55555556]

[ 1. 0.66666667]]

Actual Output: [[ 0.92]

[ 0.86]

[ 0.89]]

Predicted Output: [[ 0.89559591]

[ 0.88142069]

[ 0.8928407 ]]

RESULT:

EX NO: 3

MULTILAYER PERCEPTRON

DATE:

AIM:

To write a program for implementing the classification using Multilayer perceptron.

Source Code:

! pip install res-mlp-pytorch

! pip install torch-optimizer

import numpy as np

import pandas as pd

import os

import copy

import time

import torch

import torch.nn as nn

import cv2

import matplotlib.pyplot as plt

import copy

import time

import albumentations as A

import torch_optimizer as optim

from res_mlp_pytorch import ResMLP

from PIL import Image

from albumentations.pytorch import ToTensorV2

from torch.utils.data import Dataset, DataLoader

class FoodDataset(Dataset):

def __init__(self, data_type=None, transforms=None):

self.path = '../input/food5k/Food-5K/' + data_type + '/'

self.images_name = os.listdir(self.path)

self.transforms = transforms

def __len__(self):

return len(self.images_name)

def __getitem__(self, idx):

data = self.images_name[idx]

label = data.split('_')[0]

label = int(label)

label = torch.tensor(label)

image = cv2.imread(self.path + data)

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

if self.transforms:

aug = self.transforms(image=image)

image = aug['image']

return (image, label)

train_data = FoodDataset('training',

A.Compose([

A.RandomResizedCrop(256, 256),

A.HorizontalFlip(),

A.Normalize(),

ToTensorV2()

]))

val_data = FoodDataset('validation',

A.Compose([

A.Resize(384, 384),

A.CenterCrop(256, 256),

A.Normalize(),

ToTensorV2(),

]))

test_data = FoodDataset('evaluation',

A.Compose([

A.Resize(384, 384),

A.CenterCrop(256, 256),

A.Normalize(),

ToTensorV2(),

]))

dataloaders = {

'train': DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4),

'val': DataLoader(val_data, batch_size=32, shuffle=True, num_workers=4),

'test': DataLoader(test_data, batch_size=32, shuffle=True, num_workers=4)

}

dataset_sizes = {

'train': len(train_data),

'val': len(val_data),

'test': len(test_data)

}

def train_model(model, criterion, optimizer, epochs=1):

since = 0.0

best_model_wts = copy.deepcopy(model.state_dict())

best_loss = 0.0

best_acc = 0

for ep in range(epochs):

print(f"Epoch {ep}/{epochs-1}")

print("-"*10)

for phase in ['train', 'val']:

if phase == 'train':

model.train()

else:

model.eval()

running_loss = 0.0

running_corrects = 0

for images, labels in dataloaders[phase]:

images = images.to(device)

labels = labels.to(device)

optimizer.zero_grad()

with torch.set_grad_enabled(phase == 'train'):

outputs = model(images)

_, preds = torch.max(outputs, 1)

loss = criterion(outputs, labels)

if phase == 'train':

loss.backward()

optimizer.step()

running_loss += loss.item() * images.size(0)

running_corrects += torch.sum(preds == labels.data)

epoch_loss = running_loss / dataset_sizes[phase]

epoch_acc = running_corrects.double() / dataset_sizes[phase]

print(f"{phase} Loss:{epoch_loss:.4f} Acc:{epoch_acc:.4f}")

if phase == 'val':

if ep == 0:

best_loss = epoch_loss

best_acc = epoch_acc

best_model_wts = copy.deepcopy(model.state_dict())

else:

if epoch_loss < best_loss:

best_loss = epoch_loss

best_acc = epoch_acc

best_model_wts = copy.deepcopy(model.state_dict())

print()

time_elapsed = time.time() - since

print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')

print(f'Best val loss: {best_loss:.4f}')

print(f'Best acc: {best_acc}')

model.load_state_dict(best_model_wts)

return model

# Train The Model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ResMLP(image_size=256, patch_size=16, dim=512, depth=12, num_classes=2)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Lamb(model.parameters(), lr=0.005, weight_decay=0.2)

best_model = train_model(model, criterion, optimizer, epochs=20)

Output

# ResMLP

Best val loss: 0.2860

Best acc: 0.891

# ResNet-18

Best val loss: 0.0365

Best acc: 0.986

# ResNet-50

Best val loss: 0.0245

Best acc: 0.993

RESULT:

EX NO: 4

NAÏVE BAYESIAN CLASSIFIER

DATE:

AIM

To write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.

Source Code:

import csv import random import math

def loadCsv(filename):

lines = csv.reader(open(filename, "r")); dataset = list(lines)

for i in range(len(dataset)):

#converting strings into numbers for processing dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitDataset(dataset, splitRatio): #67% training size

trainSize = int(len(dataset) * splitRatio); trainSet = []

copy = list(dataset);

while len(trainSet) < trainSize:

#generate indices for the dataset list randomly to pick ele for training data index = random.randrange(len(copy)); trainSet.append(copy.pop(index))

return [trainSet, copy]

def separateByClass(dataset):

separated = {}

#creates a dictionary of classes 1 and 0 where the values are the instacnes belonging to each class

for i in range(len(dataset)): vector = dataset[i]

if (vector[-1] not in separated): separated[vector[-1]] = []

separated[vector[-1]].append(vector) return separated

def mean(numbers):

return sum(numbers)/float(len(numbers))

def stdev(numbers):

avg = mean(numbers)

variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1) return math.sqrt(variance)

def summarize(dataset):

summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]; del summaries[-1]

return summaries

def summarizeByClass(dataset):

separated = separateByClass(dataset); summaries = {}

for classValue, instances in separated.items():

#summaries is a dic of tuples(mean,std) for each class value summaries[classValue] = summarize(instances)

return summaries

def calculateProbability(x, mean, stdev):

exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2)))) return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def calculateClassProbabilities(summaries, inputVector):

probabilities = {}

for classValue, classSummaries in summaries.items():#class and attribute information as mean and sd

probabilities[classValue] = 1

for i in range(len(classSummaries)):

mean, stdev = classSummaries[i] #take mean and sd of every attribute for class 0 and 1 seperaely

x = inputVector[i] #testvector's first attribute probabilities[classValue] *= calculateProbability(x, mean, stdev);#use

normal dist

return probabilities

def predict(summaries, inputVector):

probabilities = calculateClassProbabilities(summaries, inputVector) bestLabel, bestProb = None, -1

for classValue, probability in probabilities.items():#assigns that class which has he

highest prob

if bestLabel is None or probability > bestProb: bestProb = probability

bestLabel = classValue return bestLabel

def getPredictions(summaries, testSet): predictions = []

for i in range(len(testSet)):

result = predict(summaries, testSet[i]) predictions.append(result)

return predictions

def getAccuracy(testSet, predictions):

correct = 0

for i in range(len(testSet)):

if testSet[i][-1] == predictions[i]: correct += 1

return (correct/float(len(testSet))) * 100.0

def main():

filename = '5data.csv' splitRatio = 0.67

dataset = loadCsv(filename);

trainingSet, testSet = splitDataset(dataset, splitRatio)

print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingSet), len(testSet)))

# prepare model

summaries = summarizeByClass(trainingSet); # test model

predictions = getPredictions(summaries, testSet) accuracy = getAccuracy(testSet, predictions)

print('Accuracy of the classifier is : {0}%'.format(accuracy)) main()

Output

confusion matrix is as follows [[17 0 0]

[ 0 17 0]

[ 0 0 11]]

Accuracy metrics

precision recall f1-score support

0 1.00 1.00 1.00

1 1.00 1.00 1.00

2 1.00 1.00 1.00

avg / total

1.00

1.00 45

RESULT:

EX NO: 5

NAÏVE BAYESIAN CLASSIFIER USING BUILT-IN FUNCTIONS

DATE:

AIM:

Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy, precision, and recall for your data set.

Source Code:

import pandas as pd msg=pd.read_csv('naivetext1.csv',names=['message','label']) print('The dimensions of the dataset',msg.shape) msg['labelnum']=msg.label.map({'pos':1,'neg':0})

X=msg.message y=msg.labelnum print(X)

print(y)

#splitting the dataset into train and test data

from sklearn.model_selection import train_test_split xtrain,xtest,ytrain,ytest=train_test_split(X,y) print(xtest.shape)

print(xtrain.shape) print(ytest.shape) print(ytrain.shape)

#output of count vectoriser is a sparse matrix

from sklearn.feature_extraction.text import CountVectorizer count_vect = CountVectorizer()

xtrain_dtm = count_vect.fit_transform(xtrain) xtest_dtm=count_vect.transform(xtest) print(count_vect.get_feature_names())

df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names()) print(df)#tabular representation

print(xtrain_dtm) #sparse matrix representation

# Training Naive Bayes (NB) classifier on training data. from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB().fit(xtrain_dtm,ytrain) predicted = clf.predict(xtest_dtm)

#printing accuracy metrics from sklearn import metrics print('Accuracy metrics')

print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted)) print('Confusion matrix')

print(metrics.confusion_matrix(ytest,predicted))

print('Recall and Precison ') print(metrics.recall_score(ytest,predicted)) print(metrics.precision_score(ytest,predicted))

'''docs_new = ['I like this place', 'My boss is not my saviour']

X_new_counts = count_vect.transform(docs_new) predictednew = clf.predict(X_new_counts)

for doc, category in zip(docs_new, predictednew):

print('%s->%s' % (doc, msg.labelnum[category]))'''

I love this sandwich,pos This is an amazing place,pos

I feel very good about these beers,pos This is my best work,pos

What an awesome view,pos

I do not like this restaurant,neg I am tired of this stuff,neg

I can't deal with this,neg He is my sworn enemy,neg My boss is horrible,neg

This is an awesome place,pos

I do not like the taste of this juice,neg I love to dance,pos

I am sick and tired of this place,neg What a great holiday,pos

That is a bad locality to stay,neg

We will have good fun tomorrow,pos I went to my enemy's house today,neg

OUTPUT

['about', 'am', 'amazing', 'an', 'and', 'awesome', 'beers', 'best', 'boss', 'can', 'deal',

'do', 'enemy', 'feel', 'fun', 'good', 'have', 'horrible', 'house', 'is', 'like', 'love', 'my',

'not', 'of', 'place', 'restaurant', 'sandwich', 'sick', 'stuff', 'these', 'this', 'tired', 'to',

'today', 'tomorrow', 'very', 'view', 'we', 'went', 'what', 'will', 'with', 'work'] about am amazing an and awesome beers best boss can ... today \

1 0 0 0 0 0 1

0 0 0 ... 0

0 0 0 0 0 0 0

1 0 0 ... 0

0 0 1 1 0 0

0 0 0 0 ... 0

0 0 0 0 0 0 0

0 0 0 ... 1

0 0 0 0 0 0 0

0 0 0 ... 0

0 1 0 0 1

0 0 0 0 0 ... 0

0 0 0 0 0 0 0

0 0 1 ... 0

0 0 0 0 0 0 0

0 0 0 ... 0

0 1 0 0 0 0 0

0 0 0 ... 0

0 0 0 1 0 1 0

0 0 0 ... 0

10 0 0

0 0 0 0 0 0 0 0 ... 0

11 0 0

0 0 0 0 0 0 1 0 ... 0

12 0 0

0 1 0 1 0 0 0 0 ... 0

tomorrow very view we went what will with work 0 0 1 0 0 0 0 0 0 0

0 0

1 0

0 0

RESULT:

EX NO: 6

NAÏVE BAYESIAN CLASSIFIER

DATE:

AIM:

To write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API.

Source Code:

import bayespy as bp

import numpy as np

import csv

from colorama import init

from colorama import Fore, Back, Style

init()

# Define Parameter Enum values

#Age

ageEnum = {'SuperSeniorCitizen':0, 'SeniorCitizen':1, 'MiddleAged':2, 'Youth':3,

'Teen':4}

# Gender

genderEnum = {'Male':0, 'Female':1}

# FamilyHistory

familyHistoryEnum = {'Yes':0, 'No':1}

# Diet(Calorie Intake)

dietEnum = {'High':0, 'Medium':1, 'Low':2}

# LifeStyle

lifeStyleEnum = {'Athlete':0, 'Active':1, 'Moderate':2, 'Sedetary':3}

# Cholesterol

cholesterolEnum = {'High':0, 'BorderLine':1, 'Normal':2}

# HeartDisease

heartDiseaseEnum = {'Yes':0, 'No':1}

#heart_disease_data.csv

with open('heart_disease_data.csv') as csvfile:

lines = csv.reader(csvfile)

dataset = list(lines)

data = []

for x in dataset:

data.append([ageEnum[x[0]],genderEnum[x[1]],familyHistoryEnum[x[2]],dietEnum[x[

3]],lifeStyleEnum[x[4]],cholesterolEnum[x[5]],heartDiseaseEnum[x[6]]])

# Training data for machine learning todo: should import from csv

data = np.array(data)

N = len(data)

# Input data column assignment

p_age = bp.nodes.Dirichlet(1.0*np.ones(5))

age = bp.nodes.Categorical(p_age, plates=(N,))

age.observe(data[:,0])

p_gender = bp.nodes.Dirichlet(1.0*np.ones(2))

gender = bp.nodes.Categorical(p_gender, plates=(N,))

gender.observe(data[:,1])

p_familyhistory = bp.nodes.Dirichlet(1.0*np.ones(2))

familyhistory = bp.nodes.Categorical(p_familyhistory, plates=(N,))

familyhistory.observe(data[:,2])

p_diet = bp.nodes.Dirichlet(1.0*np.ones(3))

diet = bp.nodes.Categorical(p_diet, plates=(N,))

diet.observe(data[:,3])

p_lifestyle = bp.nodes.Dirichlet(1.0*np.ones(4))

lifestyle = bp.nodes.Categorical(p_lifestyle, plates=(N,))

lifestyle.observe(data[:,4])

p_cholesterol = bp.nodes.Dirichlet(1.0*np.ones(3))

cholesterol = bp.nodes.Categorical(p_cholesterol, plates=(N,))

cholesterol.observe(data[:,5])

# Prepare nodes and establish edges

# np.ones(2) -> HeartDisease has 2 options Yes/No

# plates(5, 2, 2, 3, 4, 3) -> corresponds to options present for domain values

p_heartdisease = bp.nodes.Dirichlet(np.ones(2), plates=(5, 2, 2, 3, 4, 3))

heartdisease = bp.nodes.MultiMixture([age, gender, familyhistory, diet, lifestyle,

cholesterol], bp.nodes.Categorical, p_heartdisease)

heartdisease.observe(data[:,6])

p_heartdisease.update()

# Sample Test with hardcoded values

#print("Sample Probability")

#print("Probability(HeartDisease|Age=SuperSeniorCitizen, Gender=Female,

FamilyHistory=Yes, DietIntake=Medium, LifeStyle=Sedetary, Cholesterol=High)")

#print(bp.nodes.MultiMixture([ageEnum['SuperSeniorCitizen'], genderEnum['Female'],

familyHistoryEnum['Yes'], dietEnum['Medium'], lifeStyleEnum['Sedetary'],

cholesterolEnum['High']], bp.nodes.Categorical, p_heartdisease).get_moments()[0]

[heartDiseaseEnum['Yes']])

# Interactive Test

m = 0

while m == 0:

print("\n")

res = bp.nodes.MultiMixture([int(input('Enter Age: ' + str(ageEnum))),

int(input('Enter Gender: ' + str(genderEnum))), int(input('Enter FamilyHistory: ' +

str(familyHistoryEnum))), int(input('Enter dietEnum: ' + str(dietEnum))),

int(input('Enter LifeStyle: ' + str(lifeStyleEnum))), int(input('Enter Cholesterol: ' +

str(cholesterolEnum)))], bp.nodes.Categorical, p_heartdisease).get_moments()[0]

[heartDiseaseEnum['Yes']]

print("Probability(HeartDisease) = " + str(res))

#print(Style.RESET_ALL)

m = int(input("Enter for Continue:0, Exit :1 "))

OUTPUT:

Enter Age: {'SuperSeniorCitizen': 0, 'SeniorCitizen': 1, 'MiddleAged': 2, 'Youth': 3, 'Teen': 4}0

Enter Gender: {'Male': 0, 'Female': 1}0

Enter FamilyHistory: {'Yes': 0, 'No': 1}0

Enter dietEnum: {'High': 0, 'Medium': 1, 'Low': 2}0

Enter LifeStyle: {'Athlete': 0, 'Active': 1, 'Moderate': 2, 'Sedetary': 3}0

Enter Cholesterol: {'High': 0, 'BorderLine': 1, 'Normal': 2}0

Probability(HeartDisease) = 0.5

Enter for Continue:0, Exit :1 0

Enter Age: {'SuperSeniorCitizen': 0, 'SeniorCitizen': 1, 'MiddleAged': 2, 'Youth': 3, 'Teen': 4}4

Enter Gender: {'Male': 0, 'Female': 1}0

Enter FamilyHistory: {'Yes': 0, 'No': 1}0

Enter dietEnum: {'High': 0, 'Medium': 1, 'Low': 2}1

Enter LifeStyle: {'Athlete': 0, 'Active': 1, 'Moderate': 2, 'Sedetary': 3}3

Enter Cholesterol: {'High': 0, 'BorderLine': 1, 'Normal': 2}2

Probability(HeartDisease) = 0.13784165696493575

Enter for Continue:0, Exit :1 0

Enter Age: {'SuperSeniorCitizen': 0, 'SeniorCitizen': 1, 'MiddleAged': 2, 'Youth': 3, 'Teen': 4}3

Enter Gender: {'Male': 0, 'Female': 1}1

Enter FamilyHistory: {'Yes': 0, 'No': 1}0

Enter dietEnum: {'High': 0, 'Medium': 1, 'Low': 2}1

Enter LifeStyle: {'Athlete': 0, 'Active': 1, 'Moderate': 2, 'Sedetary': 3}0

Enter Cholesterol: {'High': 0, 'BorderLine': 1, 'Normal': 2}1

Probability(HeartDisease) = 0.2689414213699951

Enter for Continue:0, Exit :1

EX NO: 7

EM ALGORITHM

DATE:

AIM:

To apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program.

Source Code:

import numpy as np

import matplotlib.pyplot as plt

from sklearn.datasets.samples_generator import make_blobs X, y_true = make_blobs(n_samples=100, centers = 4,Cluster_std=0.60,random_state=0)

X = X[:, ::-1]

#flip axes for better plotting

from sklearn.mixture import GaussianMixture

gmm = GaussianMixture (n_components = 4).fit(X) lables = gmm.predict(X)

plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap=‟viridis‟); probs = gmm.predict_proba(X)

print(probs[:5].round(3))

size = 50 * probs.max(1) ** 2 # square emphasizes differences plt.scatter(X[:, 0], X[:, 1], c=labels, cmap=‟viridis‟, s=size);

from matplotlib.patches import Ellipse

def draw_ellipse(position, covariance, ax=None, **kwargs); “””Draw an ellipse with a given position and covariance”””

Ax = ax or plt.gca()

# Convert covariance to principal axes

if covariance.shape ==(2,2):

U, s, Vt = np.linalg.svd(covariance)

Angle = np.degrees(np.arctan2(U[1, 0], U[0,0])) Width, height = 2 * np.sqrt(s)

else:

angle = 0

width, height = 2 * np.sqrt(covariance)

#Draw the Ellipse

for nsig in range(1,4):

ax.add_patch(Ellipse(position, nsig * width, nsig *height, angle, **kwargs))

def plot_gmm(gmm, X, label=True, ax=None): ax = ax or plt.gca()

labels = gmm.fit(X).predict(X) if label:

ax.scatter(X[:, 0], x[:, 1], c=labels, s=40, cmap=‟viridis‟, zorder=2) else:

ax.scatter(X[:, 0], x[:, 1], s=40, zorder=2) ax.axis(„equal‟)

w_factor = 0.2 / gmm.weights_.max()

for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_): draw_ellipse(pos, covar, alpha=w * w_factor)

gmm = GaussianMixture(n_components=4, random_state=42) plot_gmm(gmm, X)

gmm = GaussianMixture(n_components=4, covariance_type=‟full‟, random_state=42)

plot_gmm(gmm, X)

Output

[[1 ,0, 0, 0]

[0 ,0, 1, 0]

[1 ,0, 0, 0]

[1 ,0, 0, 0]]

K-means

from sklearn.cluster import KMeans

#from sklearn import metrics import numpy as np

import matplotlib.pyplot as plt import pandas as pd data=pd.read_csv("kmeansdata.csv") df1=pd.DataFrame(data)

print(df1)

f1 = df1['Distance_Feature'].values f2 = df1['Speeding_Feature'].values

X=np.matrix(list(zip(f1,f2))) plt.plot()

plt.xlim([0, 100])

plt.ylim([0, 50]) plt.title('Dataset') plt.ylabel('speeding_feature') plt.xlabel('Distance_Feature') plt.scatter(f1,f2)

plt.show()

# create new plot and data plt.plot()

colors = ['b', 'g', 'r']

markers = ['o', 'v', 's']

# KMeans algorithm #K = 3

kmeans_model = KMeans(n_clusters=3).fit(X)

plt.plot()

for i, l in enumerate(kmeans_model.labels_):

plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l],ls='None') plt.xlim([0, 100])

plt.ylim([0, 50]) plt.show()

Driver_ID,Distance_Feature,Speeding_Feature

3423311935,71.24,28

3423313212,52.53,25

3423313724,64.54,27

3423311373,55.69,22

3423310999,54.58,25

3423313857,41.91,10

3423312432,58.64,20

3423311434,52.02,8

3423311328,31.25,34

3423312488,44.31,19

3423311254,49.35,40

3423312943,58.07,45

3423312536,44.22,22

3423311542,55.73,19

3423312176,46.63,43

3423314176,52.97,32

3423314202,46.25,35

3423311346,51.55,27

3423310666,57.05,26

3423313527,58.45,30

3423312182,43.42,23

3423313590,55.68,37

3423312268,55.15,18

RESULT:

EX NO: 8

PRINCIPLE COMPONENT ANALYSIS FOR DIMENSIONALITY REDUCTION

DATE:

AIM:

To write a program to implement Principle Component Analysis for Dimensionality Reduction.

Source Code:

# define transform

pca = PCA()

# prepare transform on dataset

pca.fit(data)

# apply transform to dataset

transformed = pca.transform(data)

# define the pipeline

steps = [('pca', PCA()), ('m', LogisticRegression())]

model = Pipeline(steps=steps)

# define the pipeline

steps = [('norm', MinMaxScaler()), ('pca', PCA()), ('m', LogisticRegression())]

model = Pipeline(steps=steps)

# test classification dataset

from sklearn.datasets import make_classification

# define dataset

X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)

# summarize the dataset

print(X.shape, y.shape)

(1000, 20) (1000,)

# evaluate pca with logistic regression algorithm for classification

from numpy import mean

from numpy import std

from sklearn.datasets import make_classification

from sklearn.model_selection import cross_val_score

from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.pipeline import Pipeline

from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression

# define dataset

X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)

# define the pipeline

steps = [('pca', PCA(n_components=10)), ('m', LogisticRegression())]

model = Pipeline(steps=steps)

# evaluate model

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

# report performance

print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

# evaluate pca with logistic regression algorithm for classification

from numpy import mean

from numpy import std

from sklearn.datasets import make_classification

from sklearn.model_selection import cross_val_score

from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.pipeline import Pipeline

from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression

# define dataset

X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)

# define the pipeline

steps = [('pca', PCA(n_components=10)), ('m', LogisticRegression())]

model = Pipeline(steps=steps)

# evaluate model

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

# report performance

print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

Accuracy: 0.816 (0.034)

# compare pca number of components with logistic regression algorithm for classification

from numpy import mean

from numpy import std

from sklearn.datasets import make_classification

from sklearn.model_selection import cross_val_score

from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.pipeline import Pipeline

from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression

from matplotlib import pyplot

# get the dataset

def get_dataset():

X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=7)

return X, y

# get a list of models to evaluate

def get_models():

models = dict()

for i in range(1,21):

steps = [('pca', PCA(n_components=i)), ('m', LogisticRegression())]

models[str(i)] = Pipeline(steps=steps)

return models

# evaluate a given model using cross-validation

def evaluate_model(model, X, y):

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')

return scores

# define dataset

X, y = get_dataset()

# get the models to evaluate

models = get_models()

# evaluate the models and store results

results, names = list(), list()

for name, model in models.items():

scores = evaluate_model(model, X, y)

results.append(scores)

names.append(name)

print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))

# plot model performance for comparison

pyplot.boxplot(results, labels=names, showmeans=True)

pyplot.xticks(rotation=45)

pyplot.show()

Output:

>1 0.542 (0.048)

>2 0.713 (0.048)

>3 0.720 (0.053)

>4 0.723 (0.051)

>5 0.725 (0.052)

>6 0.730 (0.046)

>7 0.805 (0.036)

>8 0.800 (0.037)

>9 0.814 (0.036)

>10 0.816 (0.034)

>11 0.819 (0.035)

>12 0.819 (0.038)

>13 0.819 (0.035)

>14 0.853 (0.029)

>15 0.865 (0.027)

>16 0.865 (0.027)

>17 0.865 (0.027)

>18 0.865 (0.027)

>19 0.865 (0.027)

>20 0.865 (0.027)

RESULT:

EX NO: 8

K-NEAREST NEIGHBOUR ALGORITHM

DATE:

AIM:

To write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both correct and wrong predictions. Java/Python ML library classes can be used for this problem.

Source Code:

import csv import random import math import operator

def loadDataset(filename, split, trainingSet=[] , testSet=[]): with open(filename, 'rb') as csvfile:

lines = csv.reader(csvfile) dataset = list(lines)

for x in range(len(dataset)-1): for y in range(4):

dataset[x][y] = float(dataset[x][y]) if random.random() < split:

trainingSet.append(dataset[x]) else:

testSet.append(dataset[x])

def euclideanDistance(instance1, instance2, length): distance = 0

for x in range(length):

distance += pow((instance1[x] - instance2[x]), 2) return math.sqrt(distance)

def getNeighbors(trainingSet, testInstance, k): distances = []

length = len(testInstance)-1

for x in range(len(trainingSet)):

dist = euclideanDistance(testInstance, trainingSet[x], length) distances.append((trainingSet[x], dist))

distances.sort(key=operator.itemgetter(1)) neighbors = []

for x in range(k):

neighbors.append(distances[x][0]) return neighbors

def

getResponse(neighbors): classVotes = {}

for x in range(len(neighbors)): response = neighbors[x][-1] if response in classVotes:

classVotes[response] += 1

else:

classVotes[response] = 1

sortedVotes =

sorted(classVotes.iteritems(),

reverse=True)

return sortedVotes[0][0]

def getAccuracy(testSet, predictions): correct = 0 for x in range(len(testSet)): key=operator.itemgetter(1

if testSet[x][-1] == predictions[x]: correct += 1

return (correct/float(len(testSet))) * 100.0

def main():

# prepare data trainingSet= [] testSet=[] split = 0.67

loadDataset('knndat.data', split, trainingSet, testSet) print('Train set: ' + repr(len(trainingSet))) print('Test set: ' + repr(len(testSet)))

# generate predictions predictions=[] k=3

for x in range(len(testSet)):

neighbors = getNeighbors(trainingSet, testSet[x],

k) result = getResponse(neighbors) predictions.append(result)

print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][- 1])) accuracy = getAccuracy(testSet, predictions)

print('Accuracy: ' + repr(accuracy) + '%') main()

OUTPUT

Confusion matrix is as follows

[[11 0 0]

[0 9 1]

[0 1 8]]

Accuracy metrics 0 1.00 1.00 1.00 11

1 0.90 0.90 0.90 10

2 0.89 0.89 0,89 9

Avg/Total 0.93 0.93 0.93 30

RESULT:

EX NO: 8

K-NEAREST NEIGHBOUR ALGORITHM

DATE:

AIM:

To implement the non-parametric Locally Weighted Regression algorithm in order to fit data points. Select appropriate data set for your experiment and draw graphs.

Source Code:

from numpy import * import operator

from os import listdir import matplotlib

import matplotlib.pyplot as plt import pandas as pd

import numpy as np1 import numpy.linalg as np

from scipy.stats.stats import pearsonr

def kernel(point,xmat, k): m,n = np1.shape(xmat)

weights = np1.mat(np1.eye((m))) for j in range(m):

diff = point - X[j]

weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2)) return weights

def localWeight(point,xmat,ymat,k): wei = kernel(point,xmat,k)

W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T)) return W

def localWeightRegression(xmat,ymat,k): m,n = np1.shape(xmat)

ypred = np1.zeros(m) for i in range(m):

ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k) return ypred

# load data points

data = pd.read_csv('data10.csv') bill = np1.array(data.total_bill) tip = np1.array(data.tip)

#preparing and add 1 in bill mbill = np1.mat(bill)

mtip = np1.mat(tip)

m= np1.shape(mbill)[1]

one = np1.mat(np1.ones(m))

X= np1.hstack((one.T,mbill.T))

#set k here

ypred = localWeightRegression(X,mtip,2)

SortIndex = X[:,1].argsort(0) xsort = X[SortIndex][:,0]

Output

Wednesday, April 10, 2019

OS LAB 1manual 2 year 2sem)

OS LAB 2 year 2 sem manual

OPERATING SYSTEMS LABORATORY

List of Tasks

1. Simulate the following CPU scheduling algorithms

a) Round Robin b) SJF c) FCFS d) Priority

2. Simulate all file allocation strategies

a) Sequential b) Indexed c) Linked

3. Simulate MVT and MFT

4. Simulate all File Organization Techniques

a) Single level directory b) Two level c) Hierarchical d) DAG

5. Simulate Bankers Algorithm for Dead Lock Avoidance

6. Simulate Bankers Algorithm for Dead Lock Prevention

7. Simulate all page replacement algorithms

a) FIFO b) LRU c) LFU Etc. …

8. Simulate Paging Technique of memory management

9. Control the number of ports opened by the operating system with

a) Semaphore b) monitors

10. Simulate how parent and child processes use shared memory and address space

11. Simulate sleeping barber problem

12. Simulate dining philosopher‘s problem

13. Simulate producer and consumer problem using threads (use java)

14. Simulate little‘s formula to predict next burst time of a process for SJF scheduling algorithm.

15. Develop a code to detect a cycle in wait-for graph

16. Develop a code to convert virtual address to physical address

17. Simulate how operating system allocates frame to process

18. Simulate the prediction of deadlock in operating system when all the processes announce their resource requirement in advance.

Week:1

CPU Scheduling Algorithms

Date:29\12\18

AIM:ToSimulate the following CPU scheduling algorithms

a) Round Robin b) SJF c) FCFS d) Priority

a) Round Robin Cpu Scheduling Algorithm

Description: Assume all the processes arrive at the same time.For round robin scheduling algorithm, read the number of processes/jobs in the system, their CPU burst times, and the size of the time slice. Time slices are assigned to each process in equal portions and in circular order, handling all processes execution. This allows every process to get an equal chance. Calculate the waiting time and turnaround time of each of the processes accordingly.

Program:

#include<stdio.h>

main()

{

int i,j,n,bu[10],wa[10],tat[10],t,ct[10],max;

float awt=0,att=0,temp=0;

clrscr();

printf("Enter the no of processes -- ");

scanf("%d",&n);

for(i=0;i<n;i++)

{

printf("\nEnter Burst Time for process %d -- ", i+1);

scanf("%d",&bu[i]);

ct[i]=bu[i];

}

printf("\nEnter the size of time slice -- ");

scanf("%d",&t);

max=bu[0];

for(i=1;i<n;i++)

if(max<bu[i])

max=bu[i];

for(j=0;j<(max/t)+1;j++)

for(i=0;i<n;i++)

if(bu[i]!=0)

if(bu[i]<=t)

{

tat[i]=temp+bu[i];

temp=temp+bu[i];

bu[i]=0;

}

else

{

bu[i]=bu[i]-t;

temp=temp+t;

}

for(i=0;i<n;i++)

{

wa[i]=tat[i]-ct[i];

att+=tat[i];

awt+=wa[i];

}

printf("\n\tPROCESS\t BURST TIME \t WAITING TIME\tTURNAROUND TIME\n");

for(i=0;i<n;i++)

{

printf("\t%d \t %d \t\t %d \t\t %d \n",i+1,ct[i],wa[i],tat[i]);

}

printf("\nThe Average Turnaround time is -- %f",att/n);

printf("\nThe Average Waiting time is -- %f ",awt/n);

getch();

}

Sample Input:

Enter the no of processes – 3

Enter Burst Time for process 1 – 24

Enter Burst Time for process 2 -- 3

Enter Burst Time for process 3 -- 3

Enter the size of time slice – 3

Sample Output:

The Average Turnaround time is – 15.666667

The Average Waiting time is -- 5.666667

PROCESS BURST TIME WAITING TIME TURNAROUND TIME

1 24 6 30

2 3 4 7

3 3 7 10

Expermental Result:

Input:

Output:

Result:

b) SJF Cpu Scheduling Algorithm data: 05\01\2019

Description: Assume all the processes arrive at the same time. For SJF scheduling algorithm, read the number of processes/jobs in the system, their CPU burst times. Arrange all the jobs in order with respect to their burst times. There may be two jobs in queue with the same execution time, and then FCFS approach is to be performed. Each process will be executed according to the length of its burst time. Then calculate the waiting time and turnaround time of each of the processes accordingly.

Program:

#include<stdio.h>

#include<conio.h>

main()

{

int p[20], bt[20], wt[20], tat[20], i, k, n, temp;

float wtavg, tatavg;

clrscr();

printf("\nEnter the number of processes -- ");

scanf("%d", &n);

for(i=0;i<n;i++)

{

p[i]=i;

printf("Enter Burst Time for Process %d -- ", i);

scanf("%d", &bt[i]);

}

for(i=0;i<n;i++)

for(k=i+1;k<n;k++)

if(bt[i]>bt[k])

{

temp=bt[i];

bt[i]=bt[k];

bt[k]=temp;

temp=p[i];

p[i]=p[k];

p[k]=temp;

}

wt[0] = wtavg = 0;

tat[0] = tatavg = bt[0];

for(i=1;i<n;i++)

{

wt[i] = wt[i-1] +bt[i-1];

tat[i] = tat[i-1] +bt[i];

wtavg = wtavg + wt[i];

tatavg = tatavg + tat[i];

}

printf("\n\t PROCESS \tBURST TIME \t WAITING TIME\t TURNAROUND TIME\n");

for(i=0;i<n;i++)

printf("\n\t P%d \t\t %d \t\t %d \t\t %d", p[i], bt[i], wt[i], tat[i]);

printf("\nAverage Waiting Time -- %f", wtavg/n);

printf("\nAverage Turnaround Time -- %f", tatavg/n);

getch();

}

Sample Input:

Enter the number of processes -- 4

Enter Burst Time for Process 0 -- 6

Enter Burst Time for Process 1 -- 8

Enter Burst Time for Process 2 -- 7

Enter Burst Time for Process 3 -- 3

Sample Output:

PROCESS BURST TIME WAITING TIME TURNAROUND TIME

P3 3 0 3

P0 6 3 9

P2 7 9 16

P1 8 16 24

Average Waiting Time -- 7.000000

Average Turnaround Time -- 13.000000

Expermental Result:

Input:

Output:

Result:

c) FCFS CPU Scheduling Algorithm data :10\01\2019

Description: For FCFS scheduling algorithm, read the number of processes/jobs in the system, their CPU burst times. The scheduling is performed on the basis of arrival time of the processes irrespective of their other parameters. Each process will be executed according to its arrival time. Calculate the waiting time and turnaround time of each of the processes accordingly.

Program:

#include<stdio.h>

#include<conio.h>

main()

{

int bt[20], wt[20], tat[20], i, n;

float wtavg, tatavg;

clrscr();

printf("\nEnter the number of processes -- ");

scanf("%d", &n);

for(i=0;i<n;i++)

{

printf("\nEnter Burst Time for Process %d -- ", i);

scanf("%d", &bt[i]);

}

wt[0] = wtavg = 0;

tat[0] = tatavg = bt[0];

for(i=1;i<n;i++)

{

wt[i] = wt[i-1] +bt[i-1];

tat[i] = tat[i-1] +bt[i];

wtavg = wtavg + wt[i];

tatavg = tatavg + tat[i];

}

printf("\t PROCESS \tBURST TIME \t WAITING TIME\t TURNAROUND TIME\n"); 2

for(i=0;i<n;i++)

printf("\n\t P%d \t\t %d \t\t %d \t\t %d", i, bt[i], wt[i], tat[i]); printf("\nAverage Waiting Time -- %f", wtavg/n); printf("\nAverage Turnaround Time -- %f", tatavg/n); getch();

}

Sample Input:

Enter the number of processes -- 3

Enter Burst Time for Process 0 -- 24

Enter Burst Time for Process 1 -- 3

Enter Burst Time for Process 2 -- 3

Sample Output:

PROCESS BURST TIME WAITING TIME TURNAROUND TIME

P0 24 0 24

P1 3 24 27

P2 3 27 30

Average Waiting Time-- 17.000000

Average Turnaround Time -- 27.000000

Input:

Enter the number of processes :4

Enter Burst Time for Process 0 :23

Enter Burst Time for Process 1:24

Enter Burst Time for Process 2 :12

Enter Burst Time For Process 3:89

Output:

PROCESS BURST TIME WAITING TIME TURNAROUND TIME

P0 23 0 23

P1 24 23 47

P2 12 47 54

P3 89 54 148

Average Waiting Time-- 32.250000

Average Turnaround Time -- 69.250000

d) PRIORITY CPU Scheduling Algorithm data:02\02\2019

Description :For priority scheduling algorithm, read the number of processes/jobs in the system, their CPU burst times, and the priorities. Arrange all the jobs in order with respect to their priorities. There may be two jobs in queue with the same priority, and then FCFS approach is to be performed. Each process will be executed according to its priority. Calculate the waiting time and turnaround time of each of the processes accordingly.

Program:

#include<stdio.h>

main()

{

int p[20],bt[20],pri[20], wt[20],tat[20],i, k, n, temp;

float wtavg, tatavg;

clrscr();

printf("Enter the number of processes --- ");

scanf("%d",&n);

for(i=0;i<n;i++)

{

p[i] = i;

printf("Enter the Burst Time & Priority of Process %d --- ",i); scanf("%d %d",&bt[i], &pri[i]);

}

for(i=0;i<n;i++)

for(k=i+1;k<n;k++)

if(pri[i] > pri[k])

{

temp=p[i];

p[i]=p[k];

p[k]=temp;

temp=bt[i];

bt[i]=bt[k];

bt[k]=temp;

temp=pri[i];

pri[i]=pri[k];

pri[k]=temp;

}

wtavg = wt[0] = 0;

tatavg = tat[0] = bt[0];

for(i=1;i<n;i++)

{

wt[i] = wt[i-1] + bt[i-1];

tat[i] = tat[i-1] + bt[i];

wtavg = wtavg + wt[i];

tatavg = tatavg + tat[i];

}

printf("\nPROCESS\t\tPRIORITY\tBURST TIME\tWAITING TIME\tTURNAROUND TIME"); for(i=0;i<n;i++)

printf("\n%d \t\t %d \t\t %d \t\t %d \t\t %d ",p[i],pri[i],bt[i],wt[i],tat[i]);

printf("\nAverage Waiting Time is --- %f",wtavg/n);

printf("\nAverage Turnaround Time is --- %f",tatavg/n);

getch();

}

Sample Input:

Enter the number of processes -- 5

Enter the Burst Time & Priority of Process 0 --- 10 3

Enter the Burst Time & Priority of Process 1 --- 1 1

Enter the Burst Time & Priority of Process 2 --- 2 4

Enter the Burst Time & Priority of Process 3 --- 1 5

Enter the Burst Time & Priority of Process 4 --- 5 2

Sample Output:

PROCESS PRIORITY BURST TIME WAITING TIME TURNAROUND TIME

1 1 1 0 1

4 2 5 1 6

0 3 10 6 16

2 4 2 16 18

3 5 1 18 19

Average Waiting Time is --- 8.200000

Average Turnaround Time is --- 12.000000

Input:

Enter the number of processes -- 4

Enter the Burst Time & Priority of Process 0 --- 23 2

Enter the Burst Time & Priority of Process 1 --- 24 1

Enter the Burst Time & Priority of Process 2 --- 12 32

Enter the Burst Time & Priority of Process 3 --- 89 5

Output:

PROCESS PRIORITY BURST TIME WAITING TIME TURNAROUND TIME

1 1 24 0 24

0 2 23 24 47

3 5 89 47 136

2 32 12 136 148

Average Waiting Time is --- 57.750000

Average Turnaround Time is --- 88.750000

Result:

Week: 2

File Allocation Strategies

Date:9-2-19

AIM:ToSimulate all file allocation strategies

a) Sequential b) Indexed c) Linked

Description : A file is a collection of data, usually stored on disk. As a logical entity, a file enables to divide data into meaningful groups. As a physical entity, a file should be considered in terms of its organization. The term "file organization" refers to the way in which data is stored in a file and, consequently, the method(s) by which it can be accessed.

a) Sequential File Allocation:

Description:

In this file organization, the records of the file are stored one after another both physically and logically. That is, record with sequence number 16 is located just after the 15th record. A record of a sequential file can only be accessed by reading all the previous records.

Program:

#include<stdio.h>

#include<stdlib.h>

#include<conio.h>

struct fileTable

{

char name[20];

int sb, nob;

}ft[30];

void main()

{

int i, j, n;

char s[20];

clrscr();

printf("Enter no of files :");

scanf("%d",&n);

for(i=0;i<n;i++)

{

printf("\nEnter file name %d :",i+1);

scanf("%s",ft[i].name);

printf("Enter starting block of file %d:",i+1);

scanf("%d",&ft[i].sb);

printf("Enter no of blocks in file %d :",i+1);

scanf("%d",&ft[i].nob);

}

printf("\nEnter the file name to be searched -- ");

scanf("%s",s);

for(i=0;i<n;i++)

if(strcmp(s, ft[i].name)==0)

break;

if(i==n)

printf("\nFile Not Found");

else

{

printf("\nFILE NAME START BLOCK NO OF BLOCKS BLOCKS OCCUPIED\n"); printf(" \n%s\t\t%d\t\t%d\t",ft[i].name,ft[i].sb,ft[i].nob); for(j=0;j<ft[i].nob;j++)

printf("%d, ",ft[i].sb+j);

}

getch();

}

Input:

Enter no of files :2

Enter file name 1 :sai

Enter starting block of file 1 :20

Enter no of blocks in file 1 :10

Enter file name 2 :Bunty

Enter starting block of file 2 :146

Enter no of blocks in file 2 :20

Enter the file name to be searched -- sai

Output:

FILE NAME START BLOCK NO OF BLOCKS BLOCKS OCCUPIED

Sai 20 10 20,21,22,23,24,25,26,27,28

Result:

b) Indexed File Allocation: 16-2-19

Description:

Indexed file allocation strategy brings all the pointers together into one location: an index block. Each file has its own index block, which is an array of disk-block addresses. The i^th entry in the index block points to the i^th block of the file. The directory contains the address of the index block. To find and read the i^th block, the pointer in the i^th index-block entry is used.

Program:

#include<stdio.h>

#include<conio.h>

struct fileTable

{

char name[20];

int nob, blocks[30];

}ft[30];

void main()

{

int i, j, n;

char s[20];

clrscr();

printf("Enter no of files :");

scanf("%d",&n);

for(i=0;i<n;i++)

{

printf("\nEnter file name %d :",i+1);

scanf("%s",ft[i].name);

printf("Enter no of blocks in file %d :",i+1);

scanf("%d",&ft[i].nob);

printf("Enter the blocks of the file :");

for(j=0;j<ft[i].nob;j++)

scanf("%d",&ft[i].blocks[j]);

}

printf("\nEnter the file name to be searched -- ");

scanf("%s",s);

for(i=0;i<n;i++)

if(strcmp(s, ft[i].name)==0)

break;

if(i==n)

printf("\nFile Not Found");

else

{

printf("\nFILE NAME NO OF BLOCKS BLOCKS OCCUPIED");

printf("\n %s\t\t%d\t",ft[i].name,ft[i].nob); for(j=0;j<ft[i].nob;j++)

printf("%d, ",ft[i].blocks[j]);

}

getch();

}

Sample Input:

Enter no of files : 2

Enter file 1 : sai

Enter no of blocks in file 1 : 3

Enter the blocks of the file 1 : 65

Enter file 2 : bunty

Enter no of blocks in file 2 : 4

Enter the blocks of the file 2 : 4

Enter the file to be searched :sai

Sample Output:

FILE NAME NO OF BLOCKS BLOCKS OCCUPIED

sai 3 65,

10,

Result:

c) Linked File Allocation: 16-2-19

Description:

With linked allocation, each file is a linked list of disk blocks; the disk blocks may be scattered anywhere on the disk. The directory contains a pointer to the first and last blocks of the file. Each block contains a pointer to the next block.

Program:

#include<stdio.h>

#include<string.h>

#include<stdlib.h>

#include<conio.h>

struct fileTable

{

char name[20];

int nob;

struct block *sb;

}ft[30];

struct block

{

int bno;

struct block *next;

};

void main()

{

int i, j, n;

char s[20];

struct block *temp;

clrscr();

printf("Enter no of files:");

scanf("%d",&n);

for(i=0;i<n;i++)

{

printf("\nEnter file name %d:",i+1);

scanf("%s",ft[i].name);

printf("Enter no of blocks in file %d :",i+1);

scanf("%d",&ft[i].nob);

ft[i].sb=(struct block*)malloc(sizeof(struct block));

temp = ft[i].sb;

printf("Enter the blocks of the file :");

scanf("%d",&temp->bno);

temp->next=NULL;

for(j=1;j<ft[i].nob;j++)

{

temp->next = (struct block*)malloc(sizeof(struct block));

temp = temp->next;

scanf("%d",&temp->bno);

}

temp->next = NULL;

}

printf("\nEnter the file name to be searched -- ");

scanf("%s",s);

for(i=0;i<n;i++)

if(strcmp(s, ft[i].name)==0)

break;

If(i==n)

printf("\nFile Not Found");

else

{

printf("\nFILE NAME NO OF BLOCKS BLOCKS OCCUPIED");

printf("\n %s\t\t%d\t",ft[i].name,ft[i].nob);

temp=ft[i].sb;

for(j=0;j<ft[i].nob;j++)

{

printf("%d  ",temp->bno);

temp = temp->next;

}

getch();

}

Input:

Enter no of files : 2

Enter file 1 : sai

Enter no of blocks in file 1 : 3

Enter the blocks of the file 1 : 2

Enter file 2 :bunty

Enter no of blocks in file 2 :3

Enter the blocks of the file 2 : 2

Enter the file to be searched : sai

Output:

FILE NAME NO OF BLOCKS BLOCKS OCCUPIED

sai 3 2 ? 3 ? 6?

Result:

Week: 3

Memory Management techniques

Date:

AIM:To Simulate MVT and MFT memory management techniques

MFT:

Description: MFT (Multiprogramming with a Fixed number of Tasks) is one of the old memory management techniques in which the memory is partitioned into fixed size partitions and each job is assigned to a partition. The memory assigned to a partition does not change.

Program:

#include<stdio.h>

#include<conio.h>

main()

{

int ms, bs, nob, ef,n, mp[10],tif=0;

int i,p=0;

clrscr();

printf("Enter the total memory available (in Bytes) -- ");

scanf("%d",&ms);

printf("Enter the block size (in Bytes) -- ");

scanf("%d", &bs);

nob=ms/bs;

ef=ms - nob*bs;

printf("\nEnter the number of processes -- ");

scanf("%d",&n);

for(i=0;i<n;i++)

{

printf("Enter memory required for process %d (in Bytes)-- ",i+1); scanf("%d",&mp[i]);

}

printf("\nNo. of Blocks available in memory -- %d",nob);

printf("\n\nPROCESS\tMEMORY REQUIRED\t ALLOCATED\tINTERNAL FRAGMENTATION");

for(i=0;i<n && p<nob;i++)

{

printf("\n %d\t\t%d",i+1,mp[i]);

if(mp[i] > bs)

printf("\t\tNO\t\t---");

else

{

printf("\t\tYES\t%d",bs-mp[i]);

tif = tif + bs-mp[i];

p++;

}

if(i<n)

printf("\nMemory is Full, Remaining Processes cannot be accomodated");

printf("\n\nTotal Internal Fragmentation is %d",tif);

printf("\nTotal External Fragmentation is %d",ef);

getch();

}

Sample Input:

Enter the total memory available (in Bytes) -- 1000

Enter the block size (in Bytes) -- 300

Enter the number of processes – 5

Enter memory required for process 1 (in Bytes) -- 275

Enter memory required for process 2 (in Bytes) -- 400

Enter memory required for process 3 (in Bytes) -- 290

Enter memory required for process 4 (in Bytes) -- 293

Enter memory required for process 5 (in Bytes) -- 100

No. of Blocks available in memory -- 3

Sample Output:

PROCESS MEMORY REQUIRED ALLOCATED INTERNAL FRAGMENTATION

1 275 YES 25

2 400 NO -----

3 290 YES 10

4 293 YES 7

Memory is Full, Remaining Processes cannot be accommodated

Total Internal Fragmentation is 42

Total External Fragmentation is 100

Input :

Output:

Result:

MVT:

Description: MVT (Multiprogramming with a Variable number of Tasks) is the memory management technique in which each job gets just the amount of memory it needs. That is, the partitioning of memory is dynamic and changes as jobs enter and leave the system. MVT is a more ``efficient'' user of resources. MFT suffers with the problem of internal fragmentation and MVT suffers with external fragmentation.

Program:

#include<stdio.h>

#include<conio.h>

main()

{

int ms,mp[10],i, temp,n=0;

char ch = 'y';

clrscr();

printf("\nEnter the total memory available (in Bytes)-- ");

scanf("%d",&ms);

temp=ms;

for(i=0;ch=='y';i++,n++)

{

printf("\nEnter memory required for process %d (in Bytes) -- ",i+1);

scanf("%d",&mp[i]);

if(mp[i]<=temp)

{

printf("\nMemory is allocated for Process %d ",i+1);

temp = temp - mp[i];

}

else

{

printf("\nMemory is Full");

break;

}

printf("\nDo you want to continue(y/n) -- ");

scanf(" %c", &ch);

}

printf("\n\nTotal Memory Available -- %d", ms);

printf("\n\n\tPROCESS\t\t MEMORY ALLOCATED ");

for(i=0;i<n;i++)

printf("\n \t%d\t\t%d",i+1,mp[i]);

printf("\n\nTotal Memory Allocated is %d",ms-temp);

printf("\nTotal External Fragmentation is %d",temp);

getch();

}

Sample Input:

Enter the total memory available (in Bytes) -- 1000

Enter memory required for process 1 (in Bytes) -- 400

Memory is allocated for Process 1

Do you want to continue(y/n) -- y

Enter memory required for process 2 (in Bytes) -- 275

Memory is allocated for Process 2

Do you want to continue(y/n) -- y

Enter memory required for process 3 (in Bytes) -- 550

Sample Output:

Memory is Full

Total Memory Available -- 1000

PROCESS MEMORY ALLOCATED

1 400

2 275

Total Memory Allocated is 675

Total External Fragmentation is325

Input\out put

Output:

Result:

Week: 4

File Organization Techniques

Date:

AIM:To Simulate all File Organization Techniques

a) Single level directory b) Two level c) Hierarchical d) DAG

DESCRIPTION

The directory structure is the organization of files into a hierarchy of folders. In a single-level directory system, all the files are placed in one directory. There is a root directory which has all files. It has a simple architecture and there are no sub directories. Advantage of single level directory system is that it is easy to find a file in the directory. In the two-level directory system, each user has own user file directory (UFD). The system maintains a master block that has one entry for each user. This master block contains the addresses of the directory of the users. When a user job starts or a user logs in, the system's master file directory (MFD) is searched. When a user refers to a particular file, only his own UFD is searched. This effectively solves the name collision problem and isolates users from one another. Hierarchical directory structure allows users to create their own subdirectories and to organize their files accordingly. A tree is the most common directory structure. The tree has a root directory, and every file in the system has a unique path name. A directory (or subdirectory) contains a set of files or subdirectories.

SINGLE LEVEL DIRECTORY ORGANIZATION

Program:

#include<stdio.h>

struct

{

char dname[10],fname[10][10];

int fcnt;

}dir;

void main()

{

int i,ch;

char f[30];

clrscr();

dir.fcnt = 0;

printf("\nEnter name of directory -- ");

scanf("%s", dir.dname);

while(1)

{

printf("\n\n1. Create File\t2. Delete File\t3. Search File \n4. Display Files\t5. Exit\nEnter your choice -- ");

scanf("%d",&ch);

switch(ch)

{

case 1: printf("\nEnter the name of the file -- ");

scanf("%s",dir.fname[dir.fcnt]);

dir.fcnt++;

break;

case 2: printf("\nEnter the name of the file -- ");

scanf("%s",f);

for(i=0;i<dir.fcnt;i++)

{

if(strcmp(f, dir.fname[i])==0)

{

printf("File %s is deleted ",f);

strcpy(dir.fname[i],dir.fname[dir.fcnt-1]);

break;

}

if(i==dir.fcnt)

printf("File %s not found",f);

else

dir.fcnt--;

break;

case 3: printf("\nEnter the name of the file -- ");

scanf("%s",f);

for(i=0;i<dir.fcnt;i++)

{

if(strcmp(f, dir.fname[i])==0)

{

printf("File %s is found ", f);

break;

}

if(i==dir.fcnt)

printf("File %s not found",f);

break;

case 4: if(dir.fcnt==0)

printf("\nDirectory Empty");

else

{

printf("\nThe Files are -- ");

for(i=0;i<dir.fcnt;i++)

printf("\t%s",dir.fname[i]);

}

break;

default: exit(0);

}

getch();

}

Output:

Enter name of directory -- CSE

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 1

Enter the name of the file -- A

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 1

Enter the name of the file -- B

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 1

Enter the name of the file -- C

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 4

The Files are -- A B C

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 3

Enter the name of the file – ABC

File ABC not found

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 2

Enter the name of the file – B

File B is deleted

1. Create File 2. Delete File 3. Search File

4. Display Files 5. Exit Enter your choice – 5

Result:

TWO LEVEL DIRECTORY ORGANIZATION

Program:

#include<stdio.h>

struct

{

char dname[10],fname[10][10];

int fcnt;

}dir[10];

void main()

{

int i,ch,dcnt,k;

char f[30], d[30];

clrscr();

dcnt=0;

while(1)

{

printf("\n\n1. Create Directory\t2. Create File\t3. Delete File"); printf("\n4. Search File\t\t5. Display\t6. Exit\t

Enter your choice -- ");

scanf("%d",&ch);

switch(ch)

{

case 1: printf("\nEnter name of directory -- ");

scanf("%s", dir[dcnt].dname);

dir[dcnt].fcnt=0;

dcnt++;

printf("Directory created");

break;

case 2: printf("\nEnter name of the directory -- ");

scanf("%s",d);

for(i=0;i<dcnt;i++)

if(strcmp(d,dir[i].dname)==0)

{

printf("Enter name of the file -- ");

scanf("%s",dir[i].fname[dir[i].fcnt]);

dir[i].fcnt++;

printf("File created");

break;

}

if(i==dcnt)

printf("Directory %s not found",d);

break;

case 3: printf("\nEnter name of the directory -- ");

scanf("%s",d);

for(i=0;i<dcnt;i++)

{

if(strcmp(d,dir[i].dname)==0)

{

printf("Enter name of the file -- ");

scanf("%s",f);

for(k=0;k<dir[i].fcnt;k++)

{

if(strcmp(f, dir[i].fname[k])==0)

{

printf("File %s is deleted ",f);

dir[i].fcnt--;

strcpy(dir[i].fname[k],dir[i].fname[dir[i].fcnt]);

goto jmp;

}

printf("File %s not found",f);

goto jmp;

}

printf("Directory %s not found",d);

jmp : break;

case 4: printf("\nEnter name of the directory -- ");

scanf("%s",d);

for(i=0;i<dcnt;i++)

{

if(strcmp(d,dir[i].dname)==0)

{

printf("Enter the name of the file -- ");

scanf("%s",f);

for(k=0;k<dir[i].fcnt;k++)

{

if(strcmp(f, dir[i].fname[k])==0)

{

printf("File %s is found ",f);

goto jmp1;

}

printf("File %s not found",f);

goto jmp1;

}

printf("Directory %s not found",d);

jmp1: break;

case 5: if(dcnt==0)

printf("\nNo Directory's ");

else

{

printf("\nDirectory\tFiles");

for(i=0;i<dcnt;i++)

{

printf("\n%s\t\t",dir[i].dname);

for(k=0;k<dir[i].fcnt;k++)

printf("\t%s",dir[i].fname[k]);

}

break;

default:exit(0);

}

getch();

}

Output: