#!/usr/bin/env python # : out ava # : dep transformers # : dep torch # : dep accelerate # : dep bitsandbytes import transformers import torch import sys # import sleekxmpp model_name = "lmsys/vicuna-33b-v1.3" if torch.cuda.is_available(): device = "cuda:0" else: raise ValueError("no cuda") sys.exit(1) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = transformers.AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", load_in_8bit=True, pad_token_id=tokenizer.eos_token_id, revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True, ) # set attention_mask and pad_token_id def gen(txt): input_ids = tokenizer(txt, return_tensors="pt").input_ids.to("cuda") outputs = model.generate( input_ids=input_ids, max_length=1024, temperature=0.7, ) result = tokenizer.batch_decode(outputs, skip_special_tokens=True) result = "".join(result) return result # Get user input and generate a response while True: user_input = input("ben: ") response = gen(user_input) print("bot: ", response) """ # Set up the XMPP client client = sleekxmpp.ClientXMPP( "ava@simatime.com", "test" ) client.connect() client.process(block=True) # Define a function that takes in a user's input and returns a response def generate_response(input_text): # You would use your language model to generate a response here response = "This is a response to the user's input: " + input_text return response # Handle incoming messages @client.add_event_handler("message") def handle_message(message): # Get the user's input user_input = message["body"] # Generate a response response = generate_response(user_input) # Send the response to the user message.reply(response).send() """