- Joined
- Apr 23, 2022
- Messages
- 1
- Reaction score
- 0
hi, I have this project for umm don't know how to translate the tratamiento de la informacion in english subject, but the point is i have to code a naive-bayes and/or K-Nearest Neighbor from scrartch in any IDE without using extension or libraries, somehow I managed to code both algorithms in python but the problem is that i need to run them using the datasets proposed by our professor, and well they are BIG datasets and my code crashes due to the extent of the characters please can someone kindly help how to adpat the code i had with the large amount of dataset???
here are the codes:
1) Naive-bayes code
2) K-nearest neighbor code
the file is the dataset for both codes
here are the codes:
1) Naive-bayes code
Python:
mp = dict()
for i in range(len(dataset)):
row = dataset[i]
y = row[-1]
if (y not in mp):
mp[y] = list()
mp[y].append(row)
for label in mp:
print(label)
for row in mp[label]:
print(row)
Python:
test = [
[2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
[1,37.17,4,1,7,4,5,1,0,0,1,0,280,1,0],
[1,16.17,0.04,1,7,3,0.04,0,0,0,0,1,0,1,1],
[1,51.83,2.04,0,0,0,1.5,0,0,0,0,1,120,2,0],
[1,29.25,13,1,1,7,0.5,0,0,0,0,1,228,1,0],
[1,31.08,3.085,1,7,3,2.5,0,1,2,1,1,160,42,0],
[1,37.75,7,1,10,7,11.5,1,1,7,1,1,300,6,0],
[1,23.5,2.75,1,0,0,4.5,0,0,0,0,1,160,26,0],
[0,22.92,1.25,1,10,3,0.25,0,0,0,1,1,120,810,0],
[1,28.17,0.125,0,3,3,0.085,0,0,0,0,1,216,2101,0],
[1,42.75,4.085,1,5,3,0.04,0,0,0,0,1,108,101,0],
[1,48.58,6.5,1,10,7,6,1,0,0,1,1,350,1,1],
[1,39.92,5,1,2,4,0.21,0,0,0,0,1,550,1,0],
[1,31.83,0.04,0,6,3,0.04,0,0,0,0,1,0,1,0],
[1,34.17,1.54,1,12,3,1.54,1,1,1,1,1,520,50001,1],
[1,33.58,2.75,1,6,3,4.25,1,1,6,0,1,204,1,1],
[0,16.92,0.5,1,2,3,0.165,0,1,6,1,1,240,36,0],
[0,22.42,11.25,0,13,7,0.75,1,1,4,0,1,0,322,1],
[1,18.42,10.415,0,5,3,0.125,1,0,0,0,1,120,376,0],
[1,34.08,6.5,1,5,3,0.125,1,0,0,1,1,443,1,0],
[1,39.17,2.5,0,2,7,10,0,0,0,1,0,200,1,0],
[1,29.42,1.25,1,8,3,1.75,0,0,0,0,1,200,1,0],
[1,20,0,1,1,3,0.5,0,0,0,0,1,144,1,0],
[0,71.58,0,1,7,3,0,0,0,0,0,2,184,1,1],
[1,18.83,0.415,0,7,3,0.165,0,1,1,0,1,200,2,0],
[1,43.08,0.375,0,7,3,0.375,1,1,8,1,1,300,163,1],
[1,18.83,3.54,0,0,0,0,0,0,0,1,1,180,2,0],
[0,21.75,1.75,0,4,2,0,0,0,0,0,1,160,1,0],
[1,27.58,3.25,0,10,7,5.085,0,1,2,1,1,369,2,0],
[1,29.42,1.25,1,7,7,0.25,0,1,2,1,1,400,109,0],
[1,28.25,5.04,0,7,4,1.5,1,1,8,1,1,144,8,1],
[1,31.57,0.375,1,1,3,0.875,1,0,0,1,0,928,1,0],
[1,47.67,2.5,1,6,4,2.5,1,1,12,1,1,410,2511,1],
[1,22.67,0.75,1,2,3,1.585,0,1,1,1,1,400,10,0],
[0,24.83,4.5,1,8,3,1,0,0,0,1,1,360,7,0],
[1,39.83,0.5,1,6,3,0.25,1,0,0,0,0,288,1,0],
[1,49.5,7.585,1,2,4,7.585,1,1,15,1,1,0,5001,1],
[1,39.17,1.625,1,7,3,1.5,1,1,10,0,1,186,4701,1],
[1,34.67,1.08,1,6,3,1.165,0,0,0,0,0,28,1,0],
[1,23.58,0.46,0,8,3,2.625,1,1,6,1,1,208,348,0],
[2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
[0,64.08,0.165,1,0,0,0,1,1,1,0,1,232,101,1],
[1,42,9.79,1,13,7,7.96,1,1,8,0,1,0,1,1],
[1,27.42,12.5,1,5,4,0.25,0,0,0,1,1,720,1,0],
[1,16.5,0.125,1,7,3,0.165,0,0,0,0,1,132,1,0],
[1,38.17,10.125,1,13,3,2.5,1,1,6,0,1,520,197,1],
[1,21.08,4.125,0,2,7,0.04,0,0,0,0,1,140,101,0],
[1,33.67,1.25,1,8,3,1.165,0,0,0,0,1,120,1,0],
[0,28.17,0.585,1,5,3,0.04,0,0,0,0,1,260,1005,0],
[1,20.67,0.835,0,7,3,2,0,0,0,1,0,240,1,0],
[1,54.33,6.75,1,7,7,2.625,1,1,11,1,1,0,285,1],
[0,17.67,0,0,4,0,0,0,0,0,0,1,86,1,0],
[1,34,5.5,0,7,3,1.5,0,0,0,1,1,60,1,0],
[1,29.83,3.5,1,7,3,0.165,0,0,0,0,1,216,1,0],
[1,40.92,2.25,0,13,7,10,1,0,0,1,1,176,1,0],
[1,25.67,12.5,1,12,3,1.21,1,1,67,1,1,140,259,1],
[0,24.75,13.665,1,10,7,1.5,0,0,0,0,1,280,2,0],
[1,34,4.5,1,5,3,1,1,0,0,1,1,240,1,0],
[1,48.5,4.25,1,6,3,0.125,1,0,0,1,1,225,1,1],
[1,33.17,3.04,0,7,7,2.04,1,1,1,1,1,180,18028,1],
[1,28.25,5.125,1,13,3,4.75,1,1,2,0,1,420,8,1],
[0,52.5,7,1,5,7,3,0,0,0,0,1,0,1,0],
[0,19.17,0.585,0,5,3,0.585,1,0,0,1,1,160,1,0],
[1,21,4.79,0,8,3,2.25,1,1,1,1,1,80,301,1],
[1,21.17,0.25,0,7,7,0.25,0,0,0,0,1,280,205,0],
[1,20.42,1.085,1,10,3,1.5,0,0,0,0,1,108,8,0],
[1,27.58,2.04,0,5,3,2,1,1,3,1,1,370,561,1],
[1,38.67,0.21,1,3,3,0.085,1,0,0,1,1,280,1,1],
[1,30.58,2.71,0,6,3,0.125,0,0,0,1,0,80,1,0],
[1,43.17,5,1,2,4,2.25,0,0,0,1,1,141,1,0],
[1,18.58,10.29,1,0,0,0.415,0,0,0,0,1,80,1,0]
]
Python:
probYes = 1
count = 0
total = 0
for row in dataset:
if(row[-1] == 1):
count+=1
total+=1
print("Total si: "+str(count)+" / "+str(total))
probYes *= count/total
for i in range(len(test)):
count = 0
total = 0
for row in mp[1]:
if(test[i] == row[i]):
count += 1
total += 1
print('Para característica '+str(i+1))
print(str(count)+" / "+str(total))
probYes *= count/total
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
probNo = 1
count = 0
total = 0
for row in dataset:
if(row[-1] == 0):
count+=1
total+=1
probNo *= count/total
print("Total no: "+str(count)+" / "+str(total))
for i in range(len(test)):
count = 0
total = 0
for row in mp[0]:
if(test[i] == row[i]):
count += 1
total += 1
print('Para característica '+str(i+1))
print(str(count)+" / "+str(total))
probNo *= count/total
Python:
print(probYes)
print(probNo)
Python:
prob = probYes/(probYes+probNo)
print("La probabilidad es: "+str(prob*100)+"%")
Python:
x = [i[0] for i in dataset]
y = [i[1] for i in dataset]
label = [i[2] for i in dataset]
import matplotlib.pyplot as plt
plt.scatter(x,y,c=label)
plt.show()
Python:
import math
def dist(testRow, trainRow):
d = 0.0
for i in range(0,len(trainRow)-1):
d += (testRow[i]-trainRow[i])**2
return math.sqrt(d)
Python:
print("Introduzca el punto que desea clasificar")
test = [int(i) for i in input().split()]
print("Introduzca a k")
k = int(input())
Python:
plt.scatter(x,y,c=label)
plt.scatter(test[0],test[1],c='red')
plt.show()
Python:
d = list()
for row in dataset:
temp = dist(test,row)
d.append((temp,row))
d.sort(key = lambda x: x[0])
knn = list()
print("K vecinos cercanos")
for i in range(k):
print("punto: ("+str(d[i][1][0])+", "+str(d[i][1][1])+") con distancia: "+str(d[i][0])+" y clase: "+str(d[i][1][-1]))
knn.append(d[i][1])
Python:
labels = [label[-1] for label in knn]
pred = max(set(labels), key=labels.count)
print('predicción: '+str(pred))