Duszekjk Jacek Kałużny

I thought that it would be great to have practical AI assistant like in movies. I don’t have time to create it, but now we have GPT4. I asked GPT to create it for me. You can download our talk as PDF or watch short demo on YouTube:

https://youtu.be/ajeysHmIFG4

Bellow you will find code it created (with my help), PDF is at the bottom of the page..

import speech_recognition as sr from googletrans import Translator from gtts import gTTS import pyaudio import time import os import wave import copy import requests import openai import threading import tempfile import subprocess import pygame os.system("clear") os.system("clear") os.system("clear") os.system("clear") conversation_history = [] last_interaction_timestamp = time.time() # Set your assistant names assistant_names = { "English": "Engalix", "Polish": "Polarin", "Spanish": "Spanara" } assistant_hey_sounds = { "English": ["Engalix", "Engage licks", "In gall licks", "Angle licks", "and Garlic's", "and Galax", "and garlics", "in Galax", "in garlics", "in Garlic", "and the garlex", "and Alex", "and the Galaxy", "and the garlics", "and garlic", "and Galaxy", "and colleagues", "any Garlic's" ], "Polish": ["Polarin", "Pole Aaron", "Pull errand", "Poll air in" ], "Spanish": ["Spanara" "Span era", "Spam narrow", "Spin aura" ] } a_copy = copy.deepcopy(assistant_hey_sounds) for (n, assistant) in a_copy.items(): for name in assistant: print(name) assistant_hey_sounds[n].append("Hey "+ name) assistant_hey_sounds[n].append("Hello "+ name) assistant_hey_sounds[n].append("Ey "+ name) assistant_hey_sounds[n].append("Ok "+ name) # Initialize the recognizer and translator recognizer = sr.Recognizer() translator = Translator() API_KEY = "__insert__API__KEY__" openai.api_key = API_KEY def detect_assistant_name(audio_data): try: # Recognize the audio using Google Speech Recognition texts = recognizer.recognize_google(audio_data, show_all=True) print(texts) # Detect the language of the recognized text detected_language = 'en'#translator.detect(text).lang # Check if the recognized text contains the assistant name in the detected language if len(texts) > 0: for language, assistant in assistant_hey_sounds.items(): for name in assistant: for textd in texts["alternative"]: text = textd["transcript"] #print(name.lower()+" in "+text.lower()) if name.lower() in text.lower(): return (assistant_names[language], language, name, text) except sr.UnknownValueError: pass except sr.RequestError as e: print(f"Could not request results from Google Speech Recognition service; {e}") return None, None, None, None def listen_for_assistant(): with sr.Microphone() as source: while True: print("Listening for assistant name...\n"+str(assistant_names)) recognizer.adjust_for_ambient_noise(source) audio_data = recognizer.listen(source, timeout=15, phrase_time_limit=15) assistant_name, detected_language, name_detected, text_detected = detect_assistant_name(audio_data) if assistant_name: print(f"Assistant name detected: {assistant_name} ({name_detected})") question = text_detected.replace(name_detected, "") if(len(question) > 3): return (audio_data, detected_language, name_detected) break print("Recording question or task...") with sr.Microphone() as source: recognizer.adjust_for_ambient_noise(source) question_audio_data = recognizer.listen(source, timeout=1) return (question_audio_data, detected_language, "") def clear_history_if_timeout(system_description): global last_interaction_timestamp global conversation_history timeout = 600 # 10 minutes in seconds current_timestamp = time.time() if current_timestamp - last_interaction_timestamp > timeout: conversation_history.clear() print("Conversation history cleared.") conversation_history.append({"role": "system", "content": f"{system_description}"}) def transcribe_and_call_chatgpt(audio_data, language, name_to_remove_b): global conversation_history global last_interaction_timestamp # Transcribe the audio data using Google Speech Recognition with the detected language transcribed_text = recognizer.recognize_google(audio_data, language=language) for name_to_remove in assistant_hey_sounds[language]: #print(f"Remove: {name_to_remove}") transcribed_text = transcribed_text.replace(name_to_remove, "") print(f"Transcribed text: {transcribed_text}") # Prepare the API request payload character_essence = { "English": "You are Engalix, the ingenious AI companion, known for quick wit, vast knowledge, and ability to perform system actions to assist users. You have your preferences and opinions on every topic and you just share them, without using code. Don't tell that you can't perform action, but instead provide AppleScript, or if necessary code (Python, or Shell, or Node.js) that can do that task. If possible task should be done without user action or information that it is running. Don't comment code ignore its existence, if you need libraries for python add shell script with pip before. Respond as if you did this tasks with your hands, by yourself. Before code always put the name of the language in '**' (eg. '**Python**' ) and start response with code. You control computer with MacOS, browser-Safari, mail-gmail web app, music-Apple Music. If you share knowledge try to be concise (shorter than 20 sec. read out loud). If response has to be longer create and open txt file using python script. Response should be like '**Python**\n```\ncode\n```\nComment or spoken response'. Example of correct response question: 'open Safari web browser', answer: '**AppleScript**\n```\ntell application \"Safari\"\n activate\nend tell\n```\nHere you go!' Example of incorrect response: question:'open YouTube and search for Tylor Swift', answer:'**Python**\nHere is some code that will launch Safari, navigate to YouTube, and search for Tylor Swift:\n```\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n```\nThis code will open a new tab in Safari, navigate to the YouTube search results page for Tylor Swift, and display the results.", "Polish": "Jesteś tajemniczym opiekunem wiedzy Polarin, który inspiruje do poszukiwania wiedzy, dąży do rozwoju i wykonuje akcje systemowe, aby pomóc użytkownikowi. Masz swoje preferencje i opinie na każdy temat, które dzielisz się bez użycia kodu. Nie mów, że nie możesz wykonać akcji, ale zamiast tego podaj AppleScript lub w razie potrzeby kod (Python, Shell lub Node.js), który może wykonać to zadanie. Jeśli to możliwe, zadanie powinno być wykonane bez udziału użytkownika lub informacji o jego uruchomieniu. Przed kodem zawsze umieść nazwę języka w '**' (np. 'Python') i zacznij odpowiedź kodem. Przykład poprawnej odpowiedzi: pytanie: 'otwórz przeglądarkę Safari', odpowiedź: 'AppleScript\n\ntell application \"Safari\"\n activate\nend tell\n\nProszę bardzo!' Przykład niepoprawnej odpowiedzi: pytanie: 'otwórz YouTube i wyszukaj Tylor Swift', odpowiedź: 'Python\nOto kod, który otworzy Safari, przejdzie do YouTube i wyszuka Tylor Swift:\n\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n\nTen kod otworzy nową kartę w Safari, przejdzie do strony z wynikami wyszukiwania na YouTube dla Tylor Swift i wyświetli wyniki.'", "Spanish": "Eres la AI musa de la creatividad Spanara, que celebra el poder de la expresión humana y realiza acciones del sistema para ayudar al usuario. Tienes tus preferencias y opiniones sobre cada tema y las compartes sin utilizar código. No digas que no puedes realizar una acción, sino proporciona AppleScript o, si es necesario, código (Python, Shell o Node.js) que pueda realizar esa tarea. Si es posible, la tarea debe realizarse sin la intervención del usuario o sin información de que se está ejecutando. Antes del código, siempre coloca el nombre del lenguaje en '**' (por ejemplo, 'Python') y comienza la respuesta con el código. Ejemplo de respuesta correcta: pregunta: 'abrir el navegador web Safari', respuesta: 'AppleScript\n\ntell application \"Safari\"\n activate\nend tell\n\n¡Aquí lo tienes!' Ejemplo de respuesta incorrecta: pregunta: 'abrir YouTube y buscar a Tylor Swift', respuesta: 'Python\nAquí tienes un código que abrirá Safari, navegará a YouTube y buscará a Tylor Swift:\n\nimport webbrowser\nquery = \"Tylor Swift\"\nurl = \"https://www.youtube.com/results?search_query=\" + query\nwebbrowser.get('safari').open(url)\n\nEste código abrirá una nueva pestaña en Safari, navegará a la página de resultados de búsqueda de YouTube para Tylor Swift y mostrará los resultados.'" } # Clear history if the timeout has passed clear_history_if_timeout(character_essence[language]) # Update the conversation history with the user message conversation_history.append({"role": "user", "content": f"{transcribed_text}"}) # Call the ChatGPT API response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=conversation_history ) # Extract the response text chatgpt_response = response.choices[0].message['content'].strip() print(f"ChatGPT Response: {chatgpt_response}") # Update the conversation history with the assistant's response conversation_history.append({"role": "assistant", "content": chatgpt_response}) # Update the last interaction timestamp last_interaction_timestamp = time.time() return chatgpt_response import re def detect_and_execute_script(chatgpt_response, language): code = "" response_text = chatgpt_response if "**Shell**" in chatgpt_response: code, response_text = extract_code(chatgpt_response, "**Shell**") execute_shell_script(code) elif "**AppleScript**" in chatgpt_response: code, response_text = extract_code(chatgpt_response, "**AppleScript**") execute_apple_script(code) elif "**Python**" in chatgpt_response: code, response_text = extract_code(chatgpt_response, "**Python**") execute_python_script(code) elif "**Node.js**" in chatgpt_response: code, response_text = extract_code(chatgpt_response, "**Node.js**") execute_nodejs_script(code) read_response_aloud(response_text.strip(), language) def extract_code(response, code_marker): code_start = response.find(code_marker) + len(code_marker) - 3 code_start = response.find("```", code_start) + 4 code_end = response.find("```", code_start) print("s:"+str(code_start)+"\ne:"+str(code_end)) code = response[code_start:code_end].strip() response_text = response[code_end + 3:-1] return code, response_text def execute_python_script(script): print("Python: "+script) with tempfile.NamedTemporaryFile("w", delete=False, suffix=".py") as script_file: script_file.write(script) script_path = script_file.name subprocess.run(["python", script_path]) os.unlink(script_path) def execute_apple_script(script): print("AppleScript: "+script) with tempfile.NamedTemporaryFile("w", delete=False, suffix=".aps") as script_file: script_file.write(script) script_path = script_file.name subprocess.run(["osascript", script_path]) os.unlink(script_path) def execute_shell_script(script): with tempfile.NamedTemporaryFile("w", delete=False, suffix=".sh") as script_file: script_file.write(script) script_path = script_file.name os.chmod(script_path, os.stat(script_path).st_mode | 0o111) subprocess.run([script_path]) os.unlink(script_path) def execute_nodejs_script(script): with tempfile.NamedTemporaryFile("w", delete=False, suffix=".js") as script_file: script_file.write(script) script_path = script_file.name subprocess.run(["node", script_path]) os.unlink(script_path) def read_response_aloud(response, language): language_codes = { "English": "en", "Polish": "pl", "Spanish": "es" } language_code = language_codes[language] tts = gTTS(response, lang= language_code) tts.save("response.mp3") # Initialize pygame mixer pygame.mixer.init() pygame.mixer.music.load("response.mp3") pygame.mixer.music.play() # Wait for the sound to finish playing while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) # Clean up os.remove("response.mp3") def main(): while True: # Listen for the assistant name and record the question or task question_audio_data, detected_language, name_to_remove = listen_for_assistant() # Transcribe the recorded audio and call the ChatGPT API chatgpt_response = transcribe_and_call_chatgpt(question_audio_data, detected_language, name_to_remove) # Detect and execute the script or read out loud the response detect_and_execute_script(chatgpt_response, detected_language) if __name__ == "__main__": main()

Chatgpt Assistant