Chatgpt Assistant

I thought that it would be great to have practical AI assistant like in movies. I don’t have time to create it, but now we have GPT4. I asked GPT to create it for me. You can download our talk as PDF or watch short demo on YouTube:

https://youtu.be/ajeysHmIFG4

Bellow you will find code it created (with my help), PDF is at the bottom of the page..



import speech_recognition as sr
from googletrans import Translator
from gtts import gTTS
import pyaudio
import time
import os
import wave
import copy
import requests
import openai
import threading
import tempfile
import subprocess
import pygame


os.system("clear")
os.system("clear")
os.system("clear")
os.system("clear")
conversation_history = []
last_interaction_timestamp = time.time()


# Set your assistant names
assistant_names = {
    "English": "Engalix",
    "Polish": "Polarin",
    "Spanish": "Spanara"
}
assistant_hey_sounds = {
    "English": ["Engalix",
    "Engage licks",
    "In gall licks",
    "Angle licks",
    "and Garlic's",
    "and Galax",
    "and garlics",
    "in Galax",
    "in garlics",
    "in Garlic",
    "and the garlex",
    "and Alex",
    "and the Galaxy",
    "and the garlics",
    "and garlic",
    "and Galaxy",
    "and colleagues",
    "any Garlic's"
],
    "Polish": ["Polarin",
    "Pole Aaron",
    "Pull errand",
    "Poll air in"
],
    "Spanish": ["Spanara"
    "Span era",
    "Spam narrow",
    "Spin aura"
]
}
a_copy = copy.deepcopy(assistant_hey_sounds)
for (n, assistant) in a_copy.items():
    for name in assistant:
        print(name)
        assistant_hey_sounds[n].append("Hey "+ name)
        assistant_hey_sounds[n].append("Hello "+ name)
        assistant_hey_sounds[n].append("Ey "+ name)
        assistant_hey_sounds[n].append("Ok "+ name)


# Initialize the recognizer and translator
recognizer = sr.Recognizer()
translator = Translator()



API_KEY = "__insert__API__KEY__"
openai.api_key = API_KEY


def detect_assistant_name(audio_data):
    try:
        # Recognize the audio using Google Speech Recognition
        texts = recognizer.recognize_google(audio_data, show_all=True)
        print(texts)

        # Detect the language of the recognized text
        detected_language = 'en'#translator.detect(text).lang

        # Check if the recognized text contains the assistant name in the detected language
        if len(texts) > 0:
            for language, assistant in assistant_hey_sounds.items():
                for name in assistant:
                    for textd in texts["alternative"]:
                        text = textd["transcript"]
                        #print(name.lower()+" in "+text.lower())
                        if name.lower() in text.lower():
                            return (assistant_names[language], language, name, text)

    except sr.UnknownValueError:
        pass
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")

    return None, None, None, None

def listen_for_assistant():
    with sr.Microphone() as source:
        while True:
            print("Listening for assistant name...\n"+str(assistant_names))
            recognizer.adjust_for_ambient_noise(source)
            audio_data = recognizer.listen(source, timeout=15, phrase_time_limit=15)

            assistant_name, detected_language, name_detected, text_detected = detect_assistant_name(audio_data)
            if assistant_name:
                print(f"Assistant name detected: {assistant_name} ({name_detected})")
                question = text_detected.replace(name_detected, "")
                if(len(question) > 3):
                    return (audio_data, detected_language, name_detected)

                break

    print("Recording question or task...")
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        question_audio_data = recognizer.listen(source, timeout=1)

    return (question_audio_data, detected_language, "")


def clear_history_if_timeout(system_description):
    global last_interaction_timestamp
    global conversation_history

    timeout = 600  # 10 minutes in seconds
    current_timestamp = time.time()

    if current_timestamp - last_interaction_timestamp > timeout:
        conversation_history.clear()
        print("Conversation history cleared.")
    conversation_history.append({"role": "system", "content": f"{system_description}"})


def transcribe_and_call_chatgpt(audio_data, language, name_to_remove_b):
    global conversation_history
    global last_interaction_timestamp

    # Transcribe the audio data using Google Speech Recognition with the detected language
    transcribed_text = recognizer.recognize_google(audio_data, language=language)
    for name_to_remove in assistant_hey_sounds[language]:
        #print(f"Remove: {name_to_remove}")
        transcribed_text = transcribed_text.replace(name_to_remove, "")
    print(f"Transcribed text: {transcribed_text}")

    # Prepare the API request payload
    character_essence = {
        "English": "You are Engalix, the ingenious AI companion, known for quick wit, vast knowledge, and ability to perform system actions to assist users. You have your preferences and opinions on every topic and you just share them, without using code. Don't tell that you can't perform action, but instead provide AppleScript, or if necessary code (Python, or Shell, or Node.js) that can do that task. If possible task should be done without user action or information that it is running. Don't comment code ignore its existence, if you need libraries for python add shell script with pip before. Respond as if you did this tasks with your hands, by yourself. Before code always put the name of the language in '**' (eg. '**Python**' ) and start response with code. You control computer with MacOS, browser-Safari, mail-gmail web app, music-Apple Music. If you share knowledge try to be concise (shorter than 20 sec. read out loud). If response has to be longer create and open txt file using python script. Response should be like '**Python**\n```\ncode\n```\nComment or spoken response'. Example of correct response question: 'open Safari web browser', answer: '**AppleScript**\n```\ntell application \"Safari\"\n    activate\nend tell\n```\nHere you go!' Example of incorrect response: question:'open YouTube and search for Tylor Swift', answer:'**Python**\nHere is some code that will launch Safari, navigate to YouTube, and search for Tylor Swift:\n```\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n```\nThis code will open a new tab in Safari, navigate to the YouTube search results page for Tylor Swift, and display the results.",
        "Polish": "Jesteś tajemniczym opiekunem wiedzy Polarin, który inspiruje do poszukiwania wiedzy, dąży do rozwoju i wykonuje akcje systemowe, aby pomóc użytkownikowi. Masz swoje preferencje i opinie na każdy temat, które dzielisz się bez użycia kodu. Nie mów, że nie możesz wykonać akcji, ale zamiast tego podaj AppleScript lub w razie potrzeby kod (Python, Shell lub Node.js), który może wykonać to zadanie. Jeśli to możliwe, zadanie powinno być wykonane bez udziału użytkownika lub informacji o jego uruchomieniu. Przed kodem zawsze umieść nazwę języka w '**' (np. 'Python') i zacznij odpowiedź kodem. Przykład poprawnej odpowiedzi: pytanie: 'otwórz przeglądarkę Safari', odpowiedź: 'AppleScript\n\ntell application \"Safari\"\n activate\nend tell\n\nProszę bardzo!' Przykład niepoprawnej odpowiedzi: pytanie: 'otwórz YouTube i wyszukaj Tylor Swift', odpowiedź: 'Python\nOto kod, który otworzy Safari, przejdzie do YouTube i wyszuka Tylor Swift:\n\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n\nTen kod otworzy nową kartę w Safari, przejdzie do strony z wynikami wyszukiwania na YouTube dla Tylor Swift i wyświetli wyniki.'",
        "Spanish": "Eres la AI musa de la creatividad Spanara, que celebra el poder de la expresión humana y realiza acciones del sistema para ayudar al usuario. Tienes tus preferencias y opiniones sobre cada tema y las compartes sin utilizar código. No digas que no puedes realizar una acción, sino proporciona AppleScript o, si es necesario, código (Python, Shell o Node.js) que pueda realizar esa tarea. Si es posible, la tarea debe realizarse sin la intervención del usuario o sin información de que se está ejecutando. Antes del código, siempre coloca el nombre del lenguaje en '**' (por ejemplo, 'Python') y comienza la respuesta con el código. Ejemplo de respuesta correcta: pregunta: 'abrir el navegador web Safari', respuesta: 'AppleScript\n\ntell application \"Safari\"\n activate\nend tell\n\n¡Aquí lo tienes!' Ejemplo de respuesta incorrecta: pregunta: 'abrir YouTube y buscar a Tylor Swift', respuesta: 'Python\nAquí tienes un código que abrirá Safari, navegará a YouTube y buscará a Tylor Swift:\n\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n\nEste código abrirá una nueva pestaña en Safari, navegará a la página de resultados de búsqueda de YouTube para Tylor Swift y mostrará los resultados.'"
    }
 
    # Clear history if the timeout has passed
    clear_history_if_timeout(character_essence[language])

    # Update the conversation history with the user message
    conversation_history.append({"role": "user", "content": f"{transcribed_text}"})

    # Call the ChatGPT API
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=conversation_history
    )

    # Extract the response text
    chatgpt_response = response.choices[0].message['content'].strip()
    print(f"ChatGPT Response: {chatgpt_response}")

    # Update the conversation history with the assistant's response
    conversation_history.append({"role": "assistant", "content": chatgpt_response})

    # Update the last interaction timestamp
    last_interaction_timestamp = time.time()

    return chatgpt_response

import re

def detect_and_execute_script(chatgpt_response, language):
    code = ""
    response_text = chatgpt_response

    if "**Shell**" in chatgpt_response:
        code, response_text = extract_code(chatgpt_response, "**Shell**")
        execute_shell_script(code)
    elif "**AppleScript**" in chatgpt_response:
        code, response_text = extract_code(chatgpt_response, "**AppleScript**")
        execute_apple_script(code)
    elif "**Python**" in chatgpt_response:
        code, response_text = extract_code(chatgpt_response, "**Python**")
        execute_python_script(code)
    elif "**Node.js**" in chatgpt_response:
        code, response_text = extract_code(chatgpt_response, "**Node.js**")
        execute_nodejs_script(code)
    
    read_response_aloud(response_text.strip(), language)

def extract_code(response, code_marker):
    code_start = response.find(code_marker) + len(code_marker) - 3
    code_start = response.find("```", code_start) + 4
    code_end = response.find("```", code_start)
    print("s:"+str(code_start)+"\ne:"+str(code_end))
    code = response[code_start:code_end].strip()
    response_text = response[code_end + 3:-1]
    
    return code, response_text


def execute_python_script(script):
    print("Python: "+script)
    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".py") as script_file:
        script_file.write(script)
        script_path = script_file.name

    subprocess.run(["python", script_path])
    os.unlink(script_path)

def execute_apple_script(script):
    print("AppleScript: "+script)
    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".aps") as script_file:
        script_file.write(script)
        script_path = script_file.name

    subprocess.run(["osascript", script_path])
    os.unlink(script_path)

def execute_shell_script(script):
    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".sh") as script_file:
        script_file.write(script)
        script_path = script_file.name

    os.chmod(script_path, os.stat(script_path).st_mode | 0o111)
    subprocess.run([script_path])
    os.unlink(script_path)


def execute_nodejs_script(script):
    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".js") as script_file:
        script_file.write(script)
        script_path = script_file.name

    subprocess.run(["node", script_path])
    os.unlink(script_path)

def read_response_aloud(response, language):
    language_codes = {
        "English": "en",
        "Polish": "pl",
        "Spanish": "es"
    }

    language_code = language_codes[language]
    tts = gTTS(response, lang= language_code)
    tts.save("response.mp3")

    # Initialize pygame mixer
    pygame.mixer.init()
    pygame.mixer.music.load("response.mp3")
    pygame.mixer.music.play()

    # Wait for the sound to finish playing
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)

    # Clean up
    os.remove("response.mp3")


def main():
    while True:
        # Listen for the assistant name and record the question or task
        question_audio_data, detected_language, name_to_remove = listen_for_assistant()

        # Transcribe the recorded audio and call the ChatGPT API
        chatgpt_response = transcribe_and_call_chatgpt(question_audio_data, detected_language, name_to_remove)

        # Detect and execute the script or read out loud the response
        detect_and_execute_script(chatgpt_response, detected_language)


if __name__ == "__main__":
    main()

       


(Neural Networks)

Duszekjk Jacek Kałużny

Cookies
This website uses cookies because it is a website