139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
import logging
|
|
import random
|
|
# Telegram
|
|
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler
|
|
from telegram import InlineKeyboardButton, InlineKeyboardMarkup, ChatAction, ParseMode
|
|
# Machine learning
|
|
from pandas import DataFrame
|
|
import numpy
|
|
from sklearn import preprocessing
|
|
from sklearn.feature_extraction.text import CountVectorizer
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
from . import config
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
count_vectorizer = CountVectorizer()
|
|
classifier = MultinomialNB()
|
|
|
|
WAARHEID = 'w'
|
|
LEUGEN = 'l'
|
|
SOURCES = [
|
|
("slecht.tkst", LEUGEN),
|
|
("goed.tkst", WAARHEID),
|
|
("slecht-gebruiker.tkst", LEUGEN),
|
|
("goed-gebruiker.tkst", WAARHEID)
|
|
]
|
|
LEUGEN_ANTWOORDEN = ["Zeg makker...",
|
|
"FOUTMELDING_BERICHT_BEVAT_LEUGEN",
|
|
"Ik hoopte dat je inmiddels wijzer was.",
|
|
"Ik ben niet boos, ik ben slechts teleurgesteld",
|
|
"Zeg makkeroni...",
|
|
"Voor jou heb ik nog een leuke anagram: je bent een flikkerende flakker.",
|
|
"Zge mkaker",
|
|
"Tijd voor een handhaving",
|
|
"Jullie fantasieën kunnen ook nooit uitgedoofd worden. Jullie frikkende frikken!"]
|
|
LEUGEN_ANTWOORDEN_GEWICHT = [
|
|
25,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
1,
|
|
1]
|
|
|
|
data_frame = DataFrame({"tekst": [], "klasse": []})
|
|
|
|
def load_file(file_name):
|
|
f = open(file_name)
|
|
for line in f:
|
|
yield line
|
|
f.close()
|
|
|
|
def build_frame(file_name, classification):
|
|
rows = []
|
|
index = []
|
|
i = 0;
|
|
for message in load_file(file_name):
|
|
rows.append({"tekst": message, "klasse": classification})
|
|
index.append(file_name + ":" + str(i))
|
|
i += 1
|
|
data_frame = DataFrame(rows, index=index)
|
|
return data_frame
|
|
|
|
def retrain_data():
|
|
global data_frame
|
|
data_frame = DataFrame({"tekst": [], "klasse": []})
|
|
LOGGER.info("Loading dataset")
|
|
for file_name, classification in SOURCES:
|
|
data_frame = data_frame.append(build_frame(file_name, classification))
|
|
|
|
LOGGER.info("Transforming dataset")
|
|
counts = count_vectorizer.fit_transform(data_frame['tekst'].values)
|
|
|
|
LOGGER.info("Training model")
|
|
targets = data_frame['klasse'].values
|
|
classifier.fit(counts, targets)
|
|
|
|
def gen_leugen_message(message, isEdit):
|
|
return random.choices(LEUGEN_ANTWOORDEN, weights=LEUGEN_ANTWOORDEN_GEWICHT)[0]
|
|
|
|
def dump_dataset(update, context):
|
|
context.bot.send_message(chat_id=update.message.chat_id, text="```" + str(data_frame) + "```", parse_mode=ParseMode.MARKDOWN)
|
|
|
|
def handle_message(update, context):
|
|
mesg = update.effective_message
|
|
is_edit = mesg.edit_date is not None
|
|
|
|
message_counts = count_vectorizer.transform([mesg.text])
|
|
result = classifier.predict(message_counts)[0]
|
|
if result == WAARHEID:
|
|
LOGGER.info("'{}' bevat de waarheid".format(mesg.text))
|
|
elif result == LEUGEN:
|
|
context.bot.send_chat_action(chat_id=mesg.chat_id, action=ChatAction.TYPING)
|
|
keyboard = [[InlineKeyboardButton("Goed", callback_data=LEUGEN), InlineKeyboardButton("Fout", callback_data=WAARHEID)]]
|
|
reply_markup = InlineKeyboardMarkup(keyboard)
|
|
mesg.reply_text(gen_leugen_message(mesg.text, is_edit), quote=True, reply_markup=reply_markup)
|
|
|
|
def handle_correction(update, context):
|
|
callback = update.callback_query
|
|
LOGGER.info("{} drukte op {}".format(callback.from_user.id, callback.data))
|
|
if callback.from_user.id in config.BEHEERDERS:
|
|
if callback.data == WAARHEID:
|
|
with open("goed-gebruiker.tkst", "a") as my_file:
|
|
my_file.write(callback.message.reply_to_message.text + "\n")
|
|
callback.answer(text="Bedankt voor de verbetering, beheerder!")
|
|
#callback.edit_message_text("Bewerking: Sorry, ik zat fout. Oeps.".format(gen_leugen_message()), parse_mode=ParseMode.MARKDOWN)
|
|
callback.message.delete()
|
|
elif callback.data == LEUGEN:
|
|
callback.answer(text="Bedankt voor de bevestiging, beheerder!")
|
|
context.bot.edit_message_reply_markup(chat_id=callback.message.chat_id, message_id=callback.message.message_id, reply_markup=InlineKeyboardMarkup([[]]))
|
|
#update.callback_query.edit_message_reply_markup(InlineKeyboardMarkup([[]]))
|
|
retrain_data()
|
|
|
|
else:
|
|
if callback.data == WAARHEID:
|
|
update.callback_query.answer(text="Bedankt voor de verbetering!")
|
|
else:
|
|
update.callback_query.answer(text="Bedankt voor de bevestiging!")
|
|
|
|
def rapporteer(update, context):
|
|
if update.message.from_user.id not in config.BEHEERDERS:
|
|
update.message.reply_text("Sorry, jij bent geen beheerder. Deze actie wordt niet uitgevoerd.")
|
|
else:
|
|
if update.message.reply_to_message is not None:
|
|
text = update.message.reply_to_message.text
|
|
else:
|
|
text = " ".join(args)
|
|
with open("slecht-gebruiker.tkst", "a") as my_file:
|
|
my_file.write(text + "\n")
|
|
update.message.reply_text("'{}' is toegevoegd aan de lijst met verboden uitdrukkingen!".format(text))
|
|
retrain_data()
|
|
|
|
def init():
|
|
"""Initializes the machine learning data"""
|
|
retrain_data()
|
|
|