From da8ca975fb9d11d3801fef66344736e675734c42 Mon Sep 17 00:00:00 2001
From: Mike Vink <mike1994vink@gmail.com>
Date: Wed, 23 Sep 2020 08:45:49 +0200
Subject: Deleting some misc files

---
 gettingStarted.py | 100 ------------------------------------------------------
 message           |  10 ------
 recursive_node.py |  58 -------------------------------
 3 files changed, 168 deletions(-)
 delete mode 100644 gettingStarted.py
 delete mode 100644 message
 delete mode 100644 recursive_node.py

diff --git a/gettingStarted.py b/gettingStarted.py
deleted file mode 100644
index 3a8d907..0000000
--- a/gettingStarted.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import numpy as np
-import random
-import math
-from copy import deepcopy
-
-credit_data = np.genfromtxt('/Users/mikevink/Documents/python/2020_data_mining_assignments/credit_score.txt', delimiter=',', skip_header=True)
-
-#print(credit_data)
-#print(credit_data[0])
-#print(credit_data[:,3])
-#print(credit_data[4,0])
-#print(np.sort(np.unique(credit_data[:,3]))) #Give the distinct values of income, sorted from low to high
-#print(np.sum(credit_data[:,5]))
-#print(credit_data.sum(axis=0)) #Add the entries of each column of credit_data
-#print(credit_data.sum(axis=1)) #Add the entries of each row
-#print(credit_data[credit_data[:,0] > 27]) # Select all rows where the first column is bigger than 27
-#
-#x = np.array([2, 5, 10])
-#print(x)
-#print(np.arange(0, 10))
-#
-#print(np.arange(0, 10)[credit_data[:,0] > 27]) #Select the *row numbers* of the rows where the first column of credit_data is bigger than 27
-#
-#index = np.random.choice(np.arange(0, 10), size=5, replace=False) #Draw a random sample of size 5 from the numbers 1 through 10 (without replacement)
-#print(index)
-#train = credit_data[index,]
-#print(train)
-#test = np.delete(credit_data, index, axis=0) #Select all rows with row number not in "index"
-#print(test)
-#
-#print(random.choice(train))
-
-
-### Practice exercise 1 ###
-def impurity(vector): # vector = list of 0s and 1s
-    num_of_class_labels = len(vector)
-    num_of_class_1 = sum(vector)
-    num_of_class_0 = num_of_class_labels - num_of_class_1
-    return (num_of_class_0 / num_of_class_labels) * (num_of_class_1 / num_of_class_labels)
-
-array=np.array([1,0,1,1,1,0,0,1,1,0,1])
-print(impurity(array))
-
-
-### Practice exercise 2 ###
-def bestsplit(x, y): # x = numeric values; y = class labels
-    x_sorted = np.sort(np.unique(x))
-    split_points = (x_sorted[:len(x_sorted)-1] + x_sorted[1:]) / 2
-    
-    best_impurity_after_split = math.inf
-    for split in split_points:
-        impurity_after_split = impurity(y[x <= split]) + impurity(y[x > split])
-        if impurity_after_split < best_impurity_after_split:
-            best_split = split
-            best_impurity_after_split = impurity_after_split
-
-    return best_split
-
-print(bestsplit(credit_data[:,3], credit_data[:,5]))
-
-
-
-class Node:
-    def _init_(self):
-        self.left  = None
-        self.right = None
-        self.split_value = None
-        
-class Leaf:
-    def __init__(self, predicted_class: int):
-        self.predicted_class = predicted_class
-
-
-def tree_grow(x, y): # x = numeric values; y = class labels
-    root = Node()
-    root.split_value = bestsplit(x, y)
-    root.left = Leaf(0)
-    root.right = Leaf(1)
-    return root
-    
-def tree_pred(x, tr):
-    y = []
-    for value in x:
-        y.append(single_value_pred(value, tr))  
-    return y
-
-def single_value_pred(value, current_tree):
-    if isinstance(current_tree, Leaf):
-        return current_tree.predicted_class
-    else:
-        if value <= current_tree.split_value:
-            return single_value_pred(value, current_tree.left)
-        else:
-            return single_value_pred(value, current_tree.right)        
-
-tree = tree_grow(credit_data[:,3], credit_data[:,5])
-print(tree_pred([32, 38, 3, 40], tree))
-
-
-
diff --git a/message b/message
deleted file mode 100644
index bc70ce8..0000000
--- a/message
+++ /dev/null
@@ -1,10 +0,0 @@
-heb twee punten:
-
-1. Impurity_after_split (line 52) is som over child impurities, in plaats
-daarvan moet het gewogen gemiddelde tot de len(new_y) / len(old_y) zijn denk ik
-(misschien? want het output hetzelfde als het voorbeeld).
-
-2. Ik denk dat je op deze manier steeds een sub-tree/ een node-recursie aan
-single_value_pred geeft. Wat is de beste manier om een node uit de tree aan te
-spreken?
-
diff --git a/recursive_node.py b/recursive_node.py
deleted file mode 100644
index 22bc75d..0000000
--- a/recursive_node.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import numpy as np
-
-
-class Node():
-    """
-    @todo: docstring for Node
-    """
-    def __init__(self, value=None):
-        """@todo: Docstring for init method.
-
-        /value=None/ @todo
-
-        """
-        self.value = value
-
-    def add_split(self, left, right):
-        """
-        @todo: Docstring for add_split
-        """
-        self.left = left
-        self.right = right
-
-
-class Tree():
-    """
-    @todo: docstring for Tree
-    """
-    def __init__(self, root_node_obj):
-        """@todo: Docstring for init method.
-
-        /root_node_obj/ @todo
-
-        """
-        self.tree = root_node_obj
-
-    def __repr__(self):
-        nodelist = [self.tree]
-        tree_str = ''
-        while nodelist:
-            current_node = nodelist.pop()
-            # print(current_node.value)
-            try:
-                childs = [current_node.right, current_node.left]
-                nodelist += childs
-            except AttributeError:
-                pass
-            tree_str += current_node.value
-        return tree_str
-
-
-n1 = Node(value="root\n")
-n2 = Node(value="left child of n1, ")
-n3 = Node(value="right child of n1")
-
-n1.add_split(n2, n3)
-
-my_tree = Tree(n1)
-print(my_tree)
-- 
cgit v1.2.3