import os import torch import requests from transformers import AutoModelForCausalLM, AutoTokenizer os.environ['CUDA_LAUNCH_BLOCKING'] = '1' class Qwen: def __init__(self, model_path="Qwen/Qwen-1_8B-Chat") -> None: '''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下''' self.model, self.tokenizer = self.init_model(model_path) def init_model(self, path = "Qwen/Qwen-1_8B-Chat"): model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval() tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) return model, tokenizer def chat(self, question): self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:" try: response, history = self.model.chat(self.tokenizer, self.data["question"], history=None) print(history) return response except: return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n" def test(): llm = Qwen(model_path="Qwen/Qwen-1_8B-Chat") answer = llm.generate("如何应对压力?") print(answer) if __name__ == '__main__': test()