summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ass1_vectorized.py369
-rw-r--r--assignment1.py274
2 files changed, 516 insertions, 127 deletions
diff --git a/ass1_vectorized.py b/ass1_vectorized.py
new file mode 100644
index 0000000..08a4f8e
--- /dev/null
+++ b/ass1_vectorized.py
@@ -0,0 +1,369 @@
+import time
+import numpy as np
+
+credit_data = np.genfromtxt('./credit_score.txt',
+ delimiter=',',
+ skip_header=True)
+
+# age,married,house,income,gender,class
+# [(22, 0, 0, 28, 1, 0)
+# (46, 0, 1, 32, 0, 0)
+# (24, 1, 1, 24, 1, 0)
+# (25, 0, 0, 27, 1, 0)
+# (29, 1, 1, 32, 0, 0)
+# (45, 1, 1, 30, 0, 1)
+# (63, 1, 1, 58, 1, 1)
+# (36, 1, 0, 52, 1, 1)
+# (23, 0, 1, 40, 0, 1)
+# (50, 1, 1, 28, 0, 1)]
+
+# In the program data points are called rows
+
+# In the program categorical or numerical attributes are called cols for columns
+
+# The last column are the classes and will be called as classes in the program
+
+
+class Tree():
+ """
+ @todo: docstring for Tree
+ """
+ def __init__(self, tree_vec_of_tuples):
+ """@todo: Docstring for init method.
+
+ /root_node_obj/ @todo
+
+ """
+ self.tree_vec_of_tuples = tree_vec_of_tuples
+ self.leaf_nodes = np.where(tree_vec_of_tuples[:,1] == -1)
+ self.classes = [(1, -1), (1, -1)]
+
+ def drop(self, y):
+ """
+ @todo: Docstring for drop
+ """
+ return y * 100
+
+ def leaf(self, y):
+ """
+ @todo: Docstring for drop
+ """
+ return y
+
+
+
+
+
+def impurity(array) -> int:
+ """
+ Assumes the argument array is a one dimensional vector of zeroes and ones.
+ Computes the gini index impurity based on the relative frequency of ones in
+ the vector.
+
+ Example:
+
+ >>> array=np.array([1,0,1,1,1,0,0,1,1,0,1])
+ >>> array
+ array([1,0,1,1,1,0,0,1,1,0,1])
+
+ >>> impurity(array)
+ 0.23140495867768596
+ """
+ # Total labels
+ n_labels = len(array)
+ if n_labels == 0:
+ # Prevents division by zero, when the potential split does not have any rows
+ n_labels = 1
+ # Number of tuples labeled 1
+ n_labels_1 = array.sum()
+ # Calculate the relative frequency of ones with respect to the total labels
+ rel_freq_1 = n_labels_1 / n_labels
+ # Use the symmetry around the median property to also calculate the
+ # relative frequency of zeroes
+ rel_freq_0 = 1 - rel_freq_1
+ # Multiply the frequencies to get the gini index
+ gini_index = rel_freq_1 * rel_freq_0
+ return gini_index
+
+
+def bestsplit(x, y) -> int:
+ """
+ x = vector of single col
+ y = vector of classes (last col in x)
+
+ Consider splits of type "x <= c" where "c" is the average of two consecutive
+ values of x in the sorted order.
+
+ x and y must be of the same length
+
+ y[i] must be the class label of the i-th observation, and x[i] is the
+ correspnding value of attribute x
+
+ Example (best split on income):
+
+ >>> bestsplit(credit_data[:,3],credit_data[:,5])
+ 36
+ """
+ x_sorted = np.sort(np.unique(x))
+ if len(x_sorted) <= 2:
+ # Allows splitting on categorical (0 or 1) cols
+ split_points = [0.5]
+ else:
+ # Take average between consecutive numerical rows in the x col
+ split_points = (x_sorted[:len(x_sorted) - 1] + x_sorted[1:]) / 2
+
+ # De toepassing van bestsplit verdeelt de x col vector in tweeen, twee
+ # arrays van "x rows". Deze moeten we terug krijgen om de in de child nodes
+ # bestsplit toe te passen.
+ #
+ # Deze lus berekent de best split value, en op basis daarvan weten we welke
+ # twee "x rows" arrays we moeten returnen, en welke split value het beste
+ # was natuurlijk.
+ best_delta_i = None
+ for split in split_points:
+ # np.index_exp maakt een boolean vector die zegt welke elementen in de
+ # col van x hoger of lager zijn dan split
+ col_slice_boolean_matrices = {
+ "left": np.index_exp[x > split],
+ "right": np.index_exp[x <= split]
+ }
+
+ # delta_i formule met de boolean vector van hierboven
+ delta_i = impurity(
+ y) - (len(y[col_slice_boolean_matrices["left"]]) *
+ impurity(y[col_slice_boolean_matrices["left"]]) +
+ len(y[col_slice_boolean_matrices["right"]]) *
+ impurity(y[col_slice_boolean_matrices["right"]])) / len(y)
+
+ print(f"{split=}, {delta_i=}")
+ #
+ if best_delta_i is not None:
+ if delta_i > best_delta_i:
+ best_delta_i, best_split, best_col_slice_boolean_matrices = delta_i, split, col_slice_boolean_matrices
+ else:
+ best_delta_i, best_split, best_col_slice_boolean_matrices = delta_i, split, col_slice_boolean_matrices
+ return best_delta_i, best_split, best_col_slice_boolean_matrices
+
+def tree_example(x=None, tr=None, **defaults) -> None:
+ """
+ @todo: Docstring for tree_example
+ """
+ tree_vec = []
+ print(tree_vec)
+ print(type(tree_vec))
+ tree_vec.append((36,3))
+ print(tree_vec)
+ tree_vec.append((0,3))
+ print(tree_vec)
+ tree_vec.append((1, -1))
+ print(tree_vec)
+ print(tree_vec[0])
+ print(type(tree_vec[0]))
+ tree_vec = np.array(tree_vec) # , dtype=(int, 2))
+ print(tree_vec)
+ print(type(tree_vec[0]))
+
+ tree = Tree(tree_vec)
+
+ # Let's show how to predict
+ # 1. maak een vector met root node voor elke row in x waarvoor je een class
+ # wil predicten.
+ y = np.ones(len(x), dtype=int)
+ print(y)
+ print(type(y))
+ print(y.shape)
+ # 2. Herinner recurrence relatie van whatsapp, en pas hem toe met
+ # tree.drop(x) op nodes die geen leaf node zijn
+ #
+ # Returns indices where not leaf node
+ print(tree.leaf_nodes)
+ # print(tree.classes)
+ # leafs = np.searchsorted(
+ # print(leafs)
+ y = np.where(np.searchsorted(tree.leaf_nodes, y))
+
+ # y = y[:,0]
+ # print(y)
+
+
+
+#
+#
+# Put all helper functions above this comment!
+
+
+def tree_grow(x=None,
+ y=None,
+ n_min=None,
+ min_leaf=None,
+ n_feat=None,
+ **defaults) -> Tree:
+ """
+ @todo: Docstring for tree_grow
+ """
+ # Voor de lus die onze tree growt instantieren we een list die tuples als
+ # elementen zal hebben, het grote voordeel is dat we op deze manier vector
+ # operaties meerdere parallele
+ # prediction drops kunnen doen. (Je kan bijvoorbeeld geen object methodes
+ # broadcasten als je een numpy array van node objecten hebt)
+ #
+ # De tuple moet uiteindelijk de informatie bevatten om voor een row in x
+ # een class te voorspellen. Hier hebben we voor nodig:
+ #
+ # 1. Het split value, voor lager of hoger test
+ # 2. Het col nummer waar de split bij hoort, anders weten we niet waar we op testen
+ #
+ # (split, col)
+ #
+ # De enige uitzondering hierop zijn leaf nodes. Om de tree data structure
+ # een numpy array te maken moeten dit ook tuples zijn. Dit lossen we op
+ # door een negatieve col aan te duiden. Dit zorgt ervoor dat de prediction
+ # functie hier eindigt.
+ #
+ # (class, negative_int: -1)
+ #
+ # Checkout tree example function for more info
+ tree_vec = []
+ # De nodelist heeft in het begin alleen de alle rows van x, omdat alle rows
+ # altijd in de root in acht worden genomen.
+ #
+ # Dit representeren we met een boolean vector, met lengte het aantal rows in x en elementen True.
+ rows = np.full((1,len(x)), True)
+ nodelist = [rows]
+
+ # tree_array = np.empty
+ # while nodelist:
+ # current_node = nodelist.pop()
+ # slices = current_node.value
+ # node_classes = y[slices]
+ # # print(node_classes)
+
+ # # f'Current node will be leaf node if (( (number of data "tuples" in child node) < {n_min=} )) \n'
+ # # put stopping rules here before making a split
+ # if len(node_classes) < n_min:
+ # current_node.value = Leaf(
+ # np.argmax(np.bincount(node_classes.astype(int))))
+ # print(f"leaf node has majority clas:\n{current_node.value.value=}")
+ # continue
+
+ # if impurity(node_classes) > 0:
+ # # print(
+ # # f"Exhaustive split search says, new node will check these rows for potential spliterinos:\n{x[slices]}"
+ # # )
+
+ # # If we arrive here ever we are splitting
+ # # bestsplit(col, node_labels) ->
+ # # {"slices": list[int], "split": numpyfloat, "best_delta_i": numpyfloat}
+
+ # # slices (list) used for knowing which rows (int) to consider in a node
+ # # best_split saved in current_node.value
+ # # best_delta_i used to find best split among x_columns
+ # best_dict = None
+ # for i, x_col in enumerate(x[slices].transpose()):
+ # print(
+ # "\nExhaustive split search says; \"Entering new column\":")
+ # col_split_dict = bestsplit(x_col, node_classes, slices)
+
+ # if best_dict is not None:
+ # if col_split_dict["delta_i"] > best_dict["delta_i"]:
+ # best_dict = col_split_dict
+ # best_dict["col"] = i
+ # else:
+ # best_dict = col_split_dict
+ # best_dict["col"] = i
+ # print("\nThe best split for current node:", best_dict)
+
+ # # Here we store the splitted data into Node objects
+ # current_node.value = best_dict["split"]
+ # current_node.col = best_dict["col"]
+ # # Split will not happen if (( (number of data "tuples" potential split) < {min_leaf=} ))\n'
+ # if min([len(x) for x in best_dict["slices"].values()]) < min_leaf:
+ # continue
+ # else:
+ # # Invert left and right because we want left to pop() first
+ # children = [
+ # Node(value=best_dict["slices"]["right"]),
+ # Node(value=best_dict["slices"]["left"])
+ # ]
+ # current_node.add_split(children[1], children[0])
+ # nodelist += children
+ # else:
+ # current_node.value = Leaf(
+ # np.argmax(np.bincount(node_classes.astype(int))))
+ # print(
+ # f"\n\nLEAF NODE has majority clas:\n{current_node.value.value=}"
+ # )
+ # continue
+ # return tree
+
+
+def predict(x, nodes) -> list:
+ """
+ @todo: Docstring for predict
+ """
+ # which row to drop
+ # print(x)
+ drop = 0
+ while not set(nodes).issubset({0, 1}):
+ print(nodes)
+ # print(x[drop])
+ if isinstance(nodes[drop].value, Leaf):
+ nodes[drop] = nodes[drop].value.value
+ drop += 1
+ continue
+
+ print(nodes[drop].value)
+ print(nodes[drop].col)
+ # print(nodes[drop].col)
+ if x[drop, nodes[drop].col] > nodes[drop].value:
+ nodes[drop] = nodes[drop].left
+ else:
+ nodes[drop] = nodes[drop].right
+ return np.array(nodes)
+
+
+def tree_pred(x=None, tr=None, **defaults) -> np.array:
+ """
+ @todo: Docstring for tree_pred
+ """
+ nodes = [tr.tree] * len(x)
+ # y = np.linspace(0, len(x), 0)
+ # y = np.array(ele)
+ y = predict(x, nodes)
+ print(f"\n\nPredicted classes for {x=}\n\n are: {y=}")
+ return y
+
+
+if __name__ == '__main__':
+ #### IMPURITY TEST
+ # array=np.array([1,0,1,1,1,0,0,1,1,0,1])
+ # print(impurity(array))
+ # Should give 0.23....
+
+ #### BESTSPLIT TEST
+ # print(bestsplit(credit_data[:, 3], credit_data[:, 5]))
+ # Should give 36
+
+ #### TREE_GROW TEST
+ tree_grow_defaults = {
+ 'x': credit_data[:, :5],
+ 'y': credit_data[:, 5],
+ 'n_min': 2,
+ 'min_leaf': 1,
+ 'n_feat': 5
+ }
+
+ # Calling the tree grow, unpacking default as argument
+ # tree_grow(**tree_grow_defaults)
+
+ #### TREE_PRED TEST
+ tree_pred_defaults = {
+ 'x': credit_data[:, :5],
+ # 'tr': tree_grow(**tree_grow_defaults)
+ }
+
+ tree_example(**tree_pred_defaults)
+ # tree_pred(**tree_pred_defaults)
+
+# start_time = time.time()
+# print("--- %s seconds ---" % (time.time() - start_time))
diff --git a/assignment1.py b/assignment1.py
index 64b865d..e0fb29b 100644
--- a/assignment1.py
+++ b/assignment1.py
@@ -17,40 +17,63 @@ credit_data = np.genfromtxt('./credit_score.txt',
# (23, 0, 1, 40, 0, 1)
# (50, 1, 1, 28, 0, 1)]
+# In the program data points are called rows
-class Node():
+# In the program categorical or numerical attributes are called cols for columns
+
+# The last column are the classes and will be called as classes in the program
+
+
+class Node:
"""
- @todo: docstring for Node
+ The node object points to two other Node objects.
"""
- def __init__(self, value=None):
- """@todo: Docstring for init method.
+ def __init__(self, split_value_or_rows=None, col=None):
+ """Initialises the column and split value for the node.
+
+ /split_value_or_rows=None/ can either be the best split value of
+ a col, or a boolean mask for x that selects the rows to consider for
+ calculating the split_value
- /value=None/ @todo
+ /col=None/ if the node object has a split_value, then it also has a col
+ that belongs to this value
"""
- self.value = value
+ self.split_value_or_rows = split_value_or_rows
+ self.col = col
def add_split(self, left, right):
"""
- @todo: Docstring for add_split
+ Method that is called in the main loop of tree_grow.
+
+ Lets the node object point to two other objects that can be either Leaf
+ or Node.
"""
self.left = left
self.right = right
-class Leaf:
- def __init__(self, value: int):
- self.value = value
+# class Leaf:
+# """
+# Simple class that contains only the majority class in the leaf node.
+# """
+# def __init__(self, maj_class):
+# """Initialises the majority vote.
+
+# /maj_class/ @todo
+
+# """
-class Tree():
+class Tree:
"""
- @todo: docstring for Tree
+ Tree object that points towards the root node.
"""
def __init__(self, root_node_obj):
- """@todo: Docstring for init method.
+ """Initialises only by pointing to a Node object.
- /root_node_obj/ @todo
+ /root_node_obj/ is a node object that is made before entering the main
+ loop of tree grow.
"""
self.tree = root_node_obj
@@ -105,10 +128,10 @@ def impurity(array) -> int:
return gini_index
-def bestsplit(x, y, slices) -> int:
+def bestsplit(x, y) -> int:
"""
- x = vector of num values
- y = vector of class labels ... array([{x: x is 0 or 1}]) ??
+ x = vector of single col
+ y = vector of classes (last col in x)
Consider splits of type "x <= c" where "c" is the average of two consecutive
values of x in the sorted order.
@@ -125,58 +148,43 @@ def bestsplit(x, y, slices) -> int:
"""
x_sorted = np.sort(np.unique(x))
if len(x_sorted) <= 2:
- # Allows for normal cat classes slicing
+ # Allows splitting on categorical (0 or 1) cols
split_points = [0.5]
else:
+ # Take average between consecutive numerical rows in the x col
split_points = (x_sorted[:len(x_sorted) - 1] + x_sorted[1:]) / 2
- best_dict = None
+ # De toepassing van bestsplit verdeelt de x col vector in tweeen, twee
+ # arrays van "x rows". Deze moeten we terug krijgen om de in de child nodes
+ # bestsplit toe te passen.
+ #
+ # Deze lus berekent de best split value, en op basis daarvan weten we welke
+ # twee "x rows" arrays we moeten returnen, en welke split value het beste
+ # was natuurlijk.
+ best_delta_i = None
for split in split_points:
- x_slices = {
- # "left": [row for row in range(len(x)) if x[row] > split],
- # "right": [row for row in range(len(x)) if x[row] <= split]
+ # np.index_exp maakt een boolean vector die zegt welke elementen in de
+ # col van x hoger of lager zijn dan split
+ col_slice_boolean_matrices = {
"left": np.index_exp[x > split],
"right": np.index_exp[x <= split]
}
- # delta_i formule
- delta_i = impurity(y) - (len(y[x_slices["left"]]) * impurity(
- y[x_slices["left"]]) + len(y[x_slices["right"]]) *
- impurity(y[x_slices["right"]])) / len(y)
-
- # this part is pretty bad
- if isinstance(slices, dict):
- x_slices = {
- "left": slices["left"][x_slices["left"]],
- "right": slices["right"][x_slices["right"]]
- # "left": np.index_exp[x > split],
- # "right": np.index_exp[x <= split]
- }
- else:
- x_slices = {
- "left": slices[x_slices["left"]],
- "right": slices[x_slices["right"]]
- # "left": np.index_exp[x > split],
- # "right": np.index_exp[x <= split]
- }
+ # delta_i formule met de boolean vector van hierboven
+ delta_i = impurity(
+ y) - (len(y[col_slice_boolean_matrices["left"]]) *
+ impurity(y[col_slice_boolean_matrices["left"]]) +
+ len(y[col_slice_boolean_matrices["right"]]) *
+ impurity(y[col_slice_boolean_matrices["right"]])) / len(y)
print(f"{split=}, {delta_i=}")
- # slices = bool_array_2_row_number(x_slices, slices)
- if best_dict is not None:
- if delta_i > best_dict["delta_i"]:
- best_dict = {
- # Make slices work regardless of np array dimensions with this list comprehension
- "slices": x_slices,
- "split": split,
- "delta_i": delta_i
- }
+ #
+ if best_delta_i is not None:
+ if delta_i > best_delta_i:
+ best_delta_i, best_split, best_col_slice_boolean_matrices = delta_i, split, col_slice_boolean_matrices
else:
- best_dict = {
- "slices": x_slices,
- "split": split,
- "delta_i": delta_i
- }
- return best_dict
+ best_delta_i, best_split, best_col_slice_boolean_matrices = delta_i, split, col_slice_boolean_matrices
+ return best_delta_i, best_split, best_col_slice_boolean_matrices
#
@@ -193,72 +201,84 @@ def tree_grow(x=None,
"""
@todo: Docstring for tree_grow
"""
- # store slice as variable
- slices = np.array([row for row in range(len(y))])
- # Initiate the nodelist with tuples of slice and class labels
- nodelist = [Node(value=slices)]
- tree = Tree(nodelist[0])
- while nodelist:
- current_node = nodelist.pop()
- slices = current_node.value
- node_classes = y[slices]
- # print(node_classes)
-
- # f'Current node will be leaf node if (( (number of data "tuples" in child node) < {n_min=} )) \n'
- # put stopping rules here before making a split
- if len(node_classes) < n_min:
- current_node.value = Leaf(
- np.argmax(np.bincount(node_classes.astype(int))))
- print(f"leaf node has majority clas:\n{current_node.value.value=}")
- continue
+ # De nodelist heeft in het begin alleen een lijst met alle rows van x,
+ # omdat alle rows in de root in acht worden genomen voor bestsplit berekening.
+ #
+ # Dit representeren we met een boolean vector, met lengte het aantal rows
+ # in x en elementen True. Deze boolean vector zullen we repeatedly gebruiken als een
+ # mask over x om de rows voor bestsplit op te halen.
+ rows = np.full((1,len(x)), True)
+
+ # Het eerste node object moet nu geinstantieerd worden
+ root = Node(split_value_or_rows=rows)
+ nodelist = [rows]
- if impurity(node_classes) > 0:
- # print(
- # f"Exhaustive split search says, new node will check these rows for potential spliterinos:\n{x[slices]}"
- # )
-
- # If we arrive here ever we are splitting
- # bestsplit(col, node_labels) ->
- # {"slices": list[int], "split": numpyfloat, "best_delta_i": numpyfloat}
-
- # slices (list) used for knowing which rows (int) to consider in a node
- # best_split saved in current_node.value
- # best_delta_i used to find best split among x_columns
- best_dict = None
- for i, x_col in enumerate(x[slices].transpose()):
- print(
- "\nExhaustive split search says; \"Entering new column\":")
- col_split_dict = bestsplit(x_col, node_classes, slices)
-
- if best_dict is not None:
- if col_split_dict["delta_i"] > best_dict["delta_i"]:
- best_dict = col_split_dict
- best_dict["col"] = i
- else:
- best_dict = col_split_dict
- best_dict["col"] = i
- print("\nThe best split for current node:", best_dict)
-
- # Here we store the splitted data into Node objects
- current_node.value = best_dict["split"]
- current_node.col = best_dict["col"]
- # Split will not happen if (( (number of data "tuples" potential split) < {min_leaf=} ))\n'
- if min([len(x) for x in best_dict["slices"].values()]) < min_leaf:
- continue
- else:
- # Invert left and right because we want left to pop() first
- children = [
- Node(value=best_dict["slices"]["right"]),
- Node(value=best_dict["slices"]["left"])
- ]
- current_node.add_split(children[1], children[0])
- nodelist += children
- else:
- current_node.value = Leaf(
- np.argmax(np.bincount(node_classes.astype(int))))
- print(f"\n\nLEAF NODE has majority clas:\n{current_node.value.value=}")
- continue
- return tree
+ # Initiate the nodelist with tuples of slice and class labels
+ # nodelist = [Node(value=slices)]
+ # tree = Tree(nodelist[0])
+ # while nodelist:
+ # current_node = nodelist.pop()
+ # slices = current_node.value
+ # node_classes = y[slices]
+ # # print(node_classes)
+
+ # # f'Current node will be leaf node if (( (number of data "tuples" in child node) < {n_min=} )) \n'
+ # # put stopping rules here before making a split
+ # if len(node_classes) < n_min:
+ # current_node.value = Leaf(
+ # np.argmax(np.bincount(node_classes.astype(int))))
+ # print(f"leaf node has majority clas:\n{current_node.value.value=}")
+ # continue
+
+ # if impurity(node_classes) > 0:
+ # # print(
+ # # f"Exhaustive split search says, new node will check these rows for potential spliterinos:\n{x[slices]}"
+ # # )
+
+ # # If we arrive here ever we are splitting
+ # # bestsplit(col, node_labels) ->
+ # # {"slices": list[int], "split": numpyfloat, "best_delta_i": numpyfloat}
+
+ # # slices (list) used for knowing which rows (int) to consider in a node
+ # # best_split saved in current_node.value
+ # # best_delta_i used to find best split among x_columns
+ # best_dict = None
+ # for i, x_col in enumerate(x[slices].transpose()):
+ # print(
+ # "\nExhaustive split search says; \"Entering new column\":")
+ # col_split_dict = bestsplit(x_col, node_classes, slices)
+
+ # if best_dict is not None:
+ # if col_split_dict["delta_i"] > best_dict["delta_i"]:
+ # best_dict = col_split_dict
+ # best_dict["col"] = i
+ # else:
+ # best_dict = col_split_dict
+ # best_dict["col"] = i
+ # print("\nThe best split for current node:", best_dict)
+
+ # # Here we store the splitted data into Node objects
+ # current_node.value = best_dict["split"]
+ # current_node.col = best_dict["col"]
+ # # Split will not happen if (( (number of data "tuples" potential split) < {min_leaf=} ))\n'
+ # if min([len(x) for x in best_dict["slices"].values()]) < min_leaf:
+ # continue
+ # else:
+ # # Invert left and right because we want left to pop() first
+ # children = [
+ # Node(value=best_dict["slices"]["right"]),
+ # Node(value=best_dict["slices"]["left"])
+ # ]
+ # current_node.add_split(children[1], children[0])
+ # nodelist += children
+ # else:
+ # current_node.value = Leaf(
+ # np.argmax(np.bincount(node_classes.astype(int))))
+ # print(
+ # f"\n\nLEAF NODE has majority clas:\n{current_node.value.value=}"
+ # )
+ # continue
+ # return tree
def predict(x, nodes) -> list:
@@ -268,7 +288,7 @@ def predict(x, nodes) -> list:
# which row to drop
# print(x)
drop = 0
- while not set(nodes).issubset({0,1}):
+ while not set(nodes).issubset({0, 1}):
print(nodes)
# print(x[drop])
if isinstance(nodes[drop].value, Leaf):
@@ -318,7 +338,7 @@ if __name__ == '__main__':
}
# Calling the tree grow, unpacking default as argument
- # tree_grow(**tree_grow_defaults)
+ tree_grow(**tree_grow_defaults)
#### TREE_PRED TEST
tree_pred_defaults = {
@@ -326,7 +346,7 @@ if __name__ == '__main__':
'tr': tree_grow(**tree_grow_defaults)
}
- tree_pred(**tree_pred_defaults)
+ # tree_pred(**tree_pred_defaults)
-start_time = time.time()
-print("--- %s seconds ---" % (time.time() - start_time))
+# start_time = time.time()
+# print("--- %s seconds ---" % (time.time() - start_time))