#!/usr/bin/python
# -*- coding: utf-8 -*-

"""Utilities for parsing readings into usable data."""
import pandas, collections, numpy, re, math

def enps(scores_column):
    """Calculate eNPS given pandas Series with scores."""
    # Strip non-digit characters
    if scores_column.dtype == "str" or scores_column.dtype == "object":
        scores_column = scores_column.str.replace('[^\d\.]','', regex=True).astype(float)
    scores_column = scores_column.astype(float)

    detractors = round(in_range(scores_column, -1, 6) * 100)
    passives = round(in_range(scores_column, 6, 8) * 100)
    promoters = round(in_range(scores_column, 8, 10) * 100)

    # HACK Rounding error compensation so they all add to 100... kinda
    passives = max(0, min(passives - (promoters + detractors + passives - 100), 100))

    # eNPS formula
    score = promoters - detractors

    return score, promoters, passives, detractors

def in_range(scores_column, lower, upper):
    # TODO Maybe move this and re.findall to separate function
    # Cast whole column into float
    numbers = pandas.to_numeric(scores_column, downcast="float",  errors="coerce")

    # Get number of values in range
    in_range = numbers[(numbers > lower) & (numbers <= upper)].count()

    return in_range / len(numbers)

# TODO Optimize
def touchpoints(good_columns, bad_columns):
    good = {}
    bad = {}

    # Each answer contains the aspects chosen by a participant split by ";".
    # We count times a particular aspect appears on an answer for each existing
    # throught all questions aspect from both good and bad columns
    for answer in good_columns.to_numpy().flatten():
        for item in answer.split(";"):
            if item:
                if item not in good:
                    good[item] = 1
                else:
                    good[item] = good[item] + 1

    for answer in bad_columns.to_numpy().flatten():
        for item in answer.split(";"):
            if item:
                if item not in bad:
                    bad[item] = 1
                else:
                    bad[item] = bad[item] + 1

    # Compile into percentages for each good/bad aspect
    touchpoints = []
    for i in range(len(good)):
        question = list(good.keys())[i]

        if question in good and question in bad:
            no_good = good[question]
            no_bad = bad[question]
            total = len(good_columns.to_numpy().flatten()) # HACK Slow

            touchpoints.append(dict(
                question = question,
                good_pct = round((no_good / total) * 100),
                bad_pct = round((no_bad / total) * 100),
            ))

    # Sort by sum
    touchpoints.sort(key=lambda t: t["good_pct"] + t["bad_pct"], reverse=True)

    return touchpoints

# HACK Horrifying
# TODO Optimize
def classify(dataframe, good_regex, bad_regex, sort_by_good = False):
    """Make an ordered dict. which each question as a key. Each question has a
    dict as value which each answer and pct. chosen"""
    questions = dict.fromkeys(dataframe.columns)
    good = re.compile(good_regex)
    bad = re.compile(bad_regex)

    # Sort each answer intro good, neither or bad and tally them up
    for question in questions:
        questions[question] = { "good": 0, "neither": 0, "bad": 0 }

        for answer in dataframe[question]:
            value = (1 / len(dataframe[question])) * 100
            if good.match(answer):
                questions[question]["good"] += value
            elif bad.match(answer):
                questions[question]["neither"] += value
            else:
                questions[question]["bad"] += value

        questions[question]["good"] = round(questions[question]["good"])
        questions[question]["neither"] = round(questions[question]["neither"])
        questions[question]["bad"] = round(questions[question]["bad"])

    # Sort questions by highest "good" pct. Fuck python btw
    if sort_by_good:
        questions = collections.OrderedDict(sorted(questions.items(), key=lambda kv: int(list(kv[1].values())[0]), reverse=True))

    return questions

def class_average(dataframe, regex):
    count = 0
    r = re.compile(regex)

    # HACK I hate pandas
    for _, column in dataframe.items():
        for cell in column:
            if r.match(cell):
                count += 1

    return min(100, max(0, round((count / dataframe.size) * 100)))

# HACK Horrifying
def tally(series):
    series = series.replace("", numpy.nan)
    series = series.dropna()
    series = series.value_counts(normalize=True).mul(100).round(1)

    return series