In [ ]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# processing.py

# University of Zurich
# Department of Computational Linguistics

# Authors: Jenifer Leleany Meyer & Florian Heinz
# Matriculation Numbers: 19-919-695 & 19-111-889


import time
from typing import List, Tuple
import re
from random import randint
import csv

dir
class Joke:
    """
    creating a class "Joke"
    """

    def __init__(self, raw_joke) -> None:
        self.raw_joke = raw_joke

    def split_into_sentences(self) -> List[str]:
        """
        Split text into sentences
        """
        big_list = []
        small_list = []
        a = []
        for line in self.raw_joke:
            new_string = ""
            # iterate through all the characters
            for c in self.raw_joke:
                # avoid all special characters
                if c.isalpha() or c.isdigit() or c in ".,\'\"?!° ()-…’“”":
                    new_string += c
            line = new_string
            # split sentences after variations of periods
            if ". . ." in line:
                a = [line]
            elif "..\"" in line:
                a = line.split("..\"")
                a[0] += "..\""
                a[-1] += "..\""
            elif ". " in line:
                a = line.split(". ")
                a[0] += "."
                if "?nn" in a[0]:
                    b = line.split("nn")
                    small_list.append(a[0])
                    small_list.extend(b)
            elif ".. " in line:
                a = line.split("..")
                a[0] += ".. "
                if "?nn" in a[0]:
                    b = line.split("nn")
                    if not small_list == []:
                        small_list.append(a[0])
                        small_list.extend(b)
            elif "... " in line:
                a = line.split("... ")
                a[0] += "..."
                if "?nn" in a[0]:
                    b = line.split("nn")
                    small_list.append(a[0])
                    small_list.extend(b)
            elif "…" in line:
                a = line.split("… ")
                a[0] += "…"
            # split sentences after question mark
            elif "? " in line:
                a = line.split("? ")
                a[0] += "?"
                if ")" in a[0]:
                    b = a[1].split(") ")
                    b[0] += ")"
                    small_list.append(a[0])
                    small_list.extend(b)
            # don't lose sentences without punctuation
            else:
                big_list.append([line])
            # solve corner case "?\n\n"
            if "?nn" in line:
                a = line.split("nn")
            # keep sentences that "stand alone"
            if not small_list == []:
                big_list.append(small_list)
                small_list = []
            else:
                big_list.append(a)
        return big_list

    def _tokenize(self) -> List[List[str]]:
        """
        Tokenize all the words in the sentence
        """
        tokenized = []
        tokenized_list = []
        big_list2 = []
        sentences_str = self.split_into_sentences()
        # open list of lists containing sentences
        for u in sentences_str:
            tokenized = []
            # iterate through list of sentences
            for sentence in u:
                # split sentences at whitespaces
                words = sentence.split()
                # iterate through words and determine where to split
                for word in words:
                    # split "(" from word
                    if word[0] == "(":
                        tokenized.append("(")
                        splitted = word.split("(")
                        tokenized.append(splitted[1])
                    # split "“" from word
                    elif word[0] == "“":
                        tokenized.append("“")
                        splitted = word.split("“")
                        tokenized.append(splitted[1])
                    # split "\"" from word
                    elif word[0] == "\"":
                        tokenized.append("\"")
                        splitted = word.split("\"")
                        tokenized.append(splitted[1])
                    # split "!" from word if word ends with a letter
                    elif word[-1] == "!" and word[-2] in "abcdefghijklmnopqrstuvwxyz":
                        splitted = word.split("!")
                        tokenized.append(splitted[0])
                        tokenized.append("!")
                    # split if last character is not a letter, digit, "!" or "-"
                    elif not word[-1] in "abcdDefghijklmnopqrstuvwxyz0123456789!-,…":
                        if len(word) == 1:
                            tokenized.extend(word)
                        # checks if the two characters before the last one are also not letters, digits, "!" or "-"
                        elif len(word) > 2 and not word[-2] in "abcdDefghijklmnopqrstuvwxyz0123456789!-,…" and not word[
                                                                                                                       -3] in "abcdDefghijklmnopqrstuvwxyz0123456789!-,…":
                            u = word[-3]
                            v = word[-2]
                            w = word[-1]
                            tokenized.append((word.split(u))[0])
                            tokenized.extend([u, v, w])
                        # checks if the character before the last one is also not a letter, digit, "!", or "-"
                        elif len(word) > 1 and not word[-2] in "abcdDefghijklmnopqrstuvwxyz0123456789!-,…":
                            v = word[-2]
                            w = word[-1]
                            tokenized.append((word.split(v))[0])
                            tokenized.extend([v, w])
                        else:
                            w = word[-1]
                            tokenized.append((word.split(w))[0])
                            tokenized.append(w)
                    # else, just add word to the list
                    else:
                        tokenized.append(word)
            big_list2.append(tokenized)
        return big_list2

    def filter_profanity(self, filename="profanities.txt") -> Tuple[List[List[str]], int]:
        """
        Filter out all the profanity
        """

        output = []

        # profanity counter
        num_profanities = 0

        # Read in profanity file and store profanities in a list
        with open(filename, "r", encoding="utf-8") as file:
            profanities = file.read().split("\n")

        lst = []
        tokenized = self._tokenize()
        # Go over every sentence
        for sentence in tokenized:
            for prof in profanities:
                for token in sentence:
                    # Check if there is a profanity
                    if token.lower() in profanities or prof in token.lower():
                        num_profanities += 1
                        # Censor
                        sentence[sentence.index(token)] = len(token) * '#'

            # Avoid appending empty lists
            if sentence:
                lst.append(sentence)
        return (lst, num_profanities)

    def tell_joke(self) -> None:
        """
        Print in a humanly readable way
        """

        filtered_string = self.filter_profanity()
        # define empty string
        output = ""
        post_str = ""
        # list containing characters the don't need " " infront of them
        for sentence in filtered_string:
            for i, token in enumerate(sentence[:-1]):
                # (i, token)
                if token in ",.!?)":
                    output += token
                elif sentence[i + 1] in "('’-–":
                    output += " " + token
                elif token in "('’-–\"":
                    output += token
                elif sentence[i - 1] in "('’-–\"":
                    output += token
                else:
                    output += token
            if sentence[-1] in ",.!?\"":
                output += sentence[-1]
            else:
                output += sentence[-1]
                #output += " " + sentence[-1]
            # define dimensions for framing
            screen_width = 80
            text_width = len(output)
            box_width = text_width + 6
            left_margin = (screen_width - box_width) // 2
            #J = Joke(self.raw_joke)
            # print frame & sentence
            post_str = output.rstrip('\n')
            big_list = []
            small_list = []
            a = []
            if ". . ." in post_str:
                a = [post_str]
            elif "..\"" in post_str:
                a = post_str.split("..\"")
                a[0] += "..\""
                a[-1] += "..\""
            elif ". " in post_str:
                a = post_str.split(". ")
                a[0] += "."
                if "?nn" in a[0]:
                    b = post_str.split("nn")
                    small_list.append(a[0])
                    small_list.extend(b)
            elif ".. " in post_str:
                a = post_str.split("..")
                a[0] += ".. "
                if "?nn" in a[0]:
                    b = post_str.split("nn")
                    if not small_list == []:
                        small_list.append(a[0])
                        small_list.extend(b)
            elif "... " in post_str:
                a = post_str.split("... ")
                a[0] += "..."
                if "?nn" in a[0]:
                    b = post_str.split("nn")
                    small_list.append(a[0])
                    small_list.extend(b)
            elif "…" in post_str:
                a = post_str.split("… ")
                a[0] += "…"
                # split sentences after question mark
            elif "? " in post_str:
                a = post_str.split("? ")
                a[0] += "?"
                if ")" in a[0]:
                    b = a[1].split(") ")
                    b[0] += ")"
                    small_list.append(a[0])
                    small_list.extend(b)
                # don't lose sentences without punctuation
            else:
                big_list.append([post_str])
                # solve corner case "?\n\n"
            if "?nn" in post_str:
                a = post_str.split("nn")
                # keep sentences that "stand alone"
            if not small_list == []:
                big_list.append(small_list)
                small_list = []
            else:
                big_list.append(a)

            last_sentence = big_list[-1]
            if  last_sentence == big_list[-1]:
                time.sleep(7)
                print('\n' + ' ' * left_margin + '+' + '-' * (box_width - 4) + '+')
                print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
                print(' ' * left_margin + '| ' + output + ' |')
                print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
                print(' ' * left_margin + '+' + '-' * (box_width - 4) + '+\n')
            else:
                print("\n")
                print(output)
                previous_output = output
                output = ""



    @staticmethod
    def pretty_print(joke) -> str:
        """
        Print in a humanly readable way
        """

        # define empty string
        output = ""

        # list containing characters the don't need " " infront of them
        for sentence in self.raw_joke:
            for i, token in enumerate(sentence[:-1]):
                # (i, token)
                if token in ",.!?)":
                    output += token
                elif sentence[i + 1] in "('’-–":
                    output += " " + token
                elif token in "('’-–\"":
                    output += token
                elif sentence[i - 1] in "('’-–\"":
                    output += token
                else:
                    output += " " + token
            if sentence[-1] in ",.!?\"":
                output += sentence[-1]
            else:
                output += " " + sentence[-1]

            # define dimensions for framing
            screen_width = 80
            text_width = len(output)
            box_width = text_width + 6
            left_margin = (screen_width - box_width) // 2
            # print frame & sentence
            print('\n' + ' ' * left_margin + '+' + '-' * (box_width - 4) + '+')
            print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
            print(' ' * left_margin + '| ' + output + ' |')
            print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
            print(' ' * left_margin + '+' + '-' * (box_width - 4) + '+\n')

    def __repr__(self):
        """
        responsible for the representation of the jokes
        """

        # define empty string
        output = ""

        # list containing characters the don't need " " infront of them
        for sentence in self.raw_joke:
            for i, token in enumerate(sentence[:-1]):
                # (i, token)
                if token in ",.!?)":
                    output += token
                elif sentence[i + 1] in "('’-–":
                    output += " " + token
                elif token in "('’-–\"":
                    output += token
                elif sentence[i - 1] in "('’-–\"":
                    output += token
                else:
                    output += " " + token
            if sentence[-1] in ",.!?\"":
                output += sentence[-1]
            else:
                output += " " + sentence[-1]

            # define dimensions for framing
            screen_width = 80
            text_width = len(output)
            box_width = text_width + 6
            left_margin = (screen_width - box_width) // 2
            # print frame & sentence
            print('\n' + ' ' * left_margin + '+' + '-' * (box_width - 4) + '+')
            print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
            print(' ' * left_margin + '| ' + output + ' |')
            print(' ' * left_margin + '| ' + ' ' * text_width + ' |')
            print(' ' * left_margin + '+' + '-' * (box_width - 4) + '+\n')

    def __eq__(self, other):
        """
        checks if the score is equal to another score
        """

        # TODO: your implementation here
        index_of_self = jokes.index(self.raw_joke)
        index_of_other = jokes.index(other.raw_joke)
        if scores[self.i] == scores[other.i]:
            return True
        else:
            return False

    def __lt__(self, other):
        """
        checks if the score is lower then the other
        """

        # TODO: your implementation here
        index_of_self = jokes.index(self.raw_joke)
        index_of_other = jokes.index(other.raw_joke)
        if scores[self.i] < scores[other.i]:
            return True
        else:
            return False

    def __gt__(self, other):
        """
        checks if the score is greater then the other
        """

        # TODO: your implementation here
        index_of_self = jokes.index(self.raw_joke)
        index_of_other = jokes.index(other.raw_joke)
        if scores[self.i] > scores[other.i]:
            return True
        else:
            return False

    def __le__(self, other):
        """
        checks if the score is lower or equal then the other
        """

        # TODO: your implementation here
        index_of_self = jokes.index(self.raw_joke)
        index_of_other = jokes.index(other.raw_joke)
        if scores[self.i] <= scores[other.i]:
            return True
        else:
            return False

    def __ge__(self, other):
        """
        checks if the score is greater or equal then the other
        """

        # TODO: your implementation here
        index_of_self = jokes.index(self.raw_joke)
        index_of_other = jokes.index(other.raw_joke)
        if scores[self.i] >= scores[other.i]:
            return True
        else:
            return False


class JokeGenerator:
    """
    generate jokes of the file
    """

    def __init__(self, filename) -> None:
        # TODO: your implementation here
        self.joke_list = filename

    def make_jokes_objects(self) -> List:
        """
        saves the jokes as objects
        """

        # TODO: your implementation here
        return [Joke(one_joke) for one_joke in self.joke_list]

    def generate_jokes(self):
        """
        #generate the jokes to tell it by the tell_joke method
        """

        # TODO: your implementation here
        j = Joke()
        if len(j.split_into_sentences()) >= 2:
            self.tell_joke()

    def random_joke(self):
        """
        chooses a random joke from the raw_joke
        """

        # TODO: your implementation here
        i = randint(0, (len(self.joke_list)-1))
        j = Joke(self.joke_list[i])
        chosen_joke = j.tell_joke()
        #print(chosen_joke)
        return chosen_joke



def main():
    with open("dadjokes_sample.csv", "r", encoding="utf-8") as file:
        #creating three new lists
        jokes = []
        scores = []
        big_list = []
        #read the csv by the DictReader and save it in read
        read = csv.DictReader(file)
        #iterate through read
        for a in read:
            #clear the big_list
            big_list = []
            #append the big_list by the values of the key "joke"
            big_list.append(a.get("joke"))
            #append the big_list by the values of the key "score"
            big_list.append(a.get("score"))
            #append the big_list by the values of the key "date"
            big_list.append(a.get("date"))
            #if the joke is too long (notizable by None) should also be added to the big_list
            if None in a.keys():
                big_list.extend(a.get(None))
            #appending the jokes to the jokes list
            jokes.append(big_list[0:-2])
            #appending the jokes to the jokes list
            scores.append(big_list[-2])

        #bonus task
        max_score = max(scores)
        index_of_max_score = scores.index(max_score)
        #print(f"The joke with the highest score is: {jokes[index_of_max_score]} with a score of {max_score}.")
        #print(jokes)
        #print(scores)
        jg = JokeGenerator(jokes)
        print(jg.random_joke())



if __name__ == '__main__':
    main()