From 437f78836cfc0887cfd9d3b13be25cf32d3b4996 Mon Sep 17 00:00:00 2001
From: op07n <39182105+op07n@users.noreply.github.com>
Date: Tue, 21 Jan 2020 16:31:03 +0100
Subject: [PATCH] Delete DTree-Classifier.ipynb

---
 apps/DTree-Classifier.ipynb | 141 ------------------------------------
 1 file changed, 141 deletions(-)
 delete mode 100644 apps/DTree-Classifier.ipynb

diff --git a/apps/DTree-Classifier.ipynb b/apps/DTree-Classifier.ipynb
deleted file mode 100644
index 502ae51..0000000
--- a/apps/DTree-Classifier.ipynb
+++ /dev/null
@@ -1,141 +0,0 @@
-# import streamlit as st
-import pandas as pd
-
-
-def unique_values(rows, col):
-    return set([row[col] for row in rows])
-
-def class_counts(rows):
-    counts = {}
-    for row in rows:
-        label = row[-1]
-        if label not in counts:
-            counts[label] = 0
-        counts[label]+=1
-    return counts
-
-
-def is_numeric(value):
-    return isinstance(value, int) or isinstance(value, float)
-
-
-class Question:
-    def __init__(self, column, value):
-        self.column = column
-        self.value = value
-
-    def match(self, example):
-        val = example[self.column]
-        if is_numeric(val):
-            return val >= self.value
-        else:
-            return val == self.value
-
-    def __repr__(self):
-        condition = "=="
-        if is_numeric(self.value):
-            condition=">="
-        return f'Is {header[self.column]} {condition} {str(self.value)} ?'
-
-
-def partition(rows, question):
-    true_rows, false_rows = [], []
-    for row in rows:
-        if question.match(row):
-            true_rows.append(row)
-        else:
-            false_rows.append(row)
-    return true_rows, false_rows
-
-def gini(rows):
-    counts = class_counts(rows)
-    impurity = 1
-    for lbl in counts:
-        prob_of_lbl = counts[lbl] / float(len(rows))
-        impurity -=prob_of_lbl**2
-    return impurity
-
-
-def info_gain(left, right, current_uncertainty):
-    p = float(len(left)) / (len(left) + len(right))
-    return current_uncertainty - p * gini(left) - (1 - p) * gini(right)
-
-def find_best_split(rows):
-    best_gain = 0
-    best_question = None
-    current_uncertainty = gini(rows)
-    n_features = len(rows[0]) - 1
-
-    for col in range(n_features):
-        values = set([row[col] for row in rows])
-        for val in values:
-            question = Question(col, val)
-            true_rows, false_rows = partition(rows, question)
-            if len(true_rows) == 0 or len(false_rows) == 0:
-                continue
-            gain = info_gain(true_rows, false_rows, current_uncertainty)
-            if gain>= best_gain:
-                best_gain, best_question = gain, question
-    return best_gain, best_question
-
-class Leaf:
-    def __init__(self, rows):
-        self.predictions = class_counts(rows)
-
-class Decision_Node:
-    def __init__(self, question, true_branch, false_branch):
-        self.question = question
-        self.true_branch = true_branch
-        self.false_branch = false_branch
-
-def build_tree(rows):
-    gain, question = find_best_split(rows)
-    if gain == 0:
-        return Leaf(rows)
-
-    true_rows, false_rows = partition(rows, question)
-    true_branch = build_tree(true_rows)
-    false_branch = build_tree(false_rows)
-
-    return Decision_Node(question, true_branch, false_branch)
-
-def print_tree(node, spacing=''):
-    if isinstance(node, Leaf):
-        print(spacing + 'Predict', node.predictions)
-        return
-    print(spacing + str(node.question))
-    print(spacing + '--> True:')
-    print_tree(node.true_branch, spacing + ' ')
-    print(spacing + '--> False:')
-    print_tree(node.false_branch, spacing + ' ')
-
-
-def classify(row, node):
-    if isinstance(node, Leaf):
-        return node.predictions
-    if node.question.match(row):
-        return classify(row, node.true_branch)
-    else:
-        return classify(row, node.false_branch)
-
-
-def print_leaf(counts):
-    total = sum(counts.values()) * 1.0
-    probs = {}
-    for lbl in counts.keys():
-        probs[lbl] = str(int(counts[lbl] / total * 100))+'%'
-    return probs
-
-
-def predict(data, header):
-    header = header
-    tree = build_tree(data)
-    results = {}
-    for row in data:
-        # print(f'Actual: {row[-1]}. Predicted: {print_leaf(classify(row, tree))}')
-        results[row[-1]] = print_leaf(classify(row, tree))
-    return results, tree
-
-# results, tree = predict(data, header)
-# print(results)
-# print_tree(tree)