From 1c8f9338bb14d6aa0345aab3d88f59e6b3794579 Mon Sep 17 00:00:00 2001 From: "yanyuxiyangzk@126.com" Date: Wed, 3 Apr 2024 15:50:59 +0800 Subject: [PATCH] =?UTF-8?q?vllm=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- echo.html | 2 +- llm/README.md | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 llm/README.md diff --git a/echo.html b/echo.html index f687c4e..bb117f8 100644 --- a/echo.html +++ b/echo.html @@ -29,7 +29,7 @@ $(document).ready(function() { var host = window.location.hostname - var ws = new WebSocket("ws://"+host+":8000/humanecho"); + var ws = new WebSocket("ws://"+host+":8000/humanchat"); //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]); ws.onopen = function() { console.log('Connected'); diff --git a/llm/README.md b/llm/README.md new file mode 100644 index 0000000..f67e07e --- /dev/null +++ b/llm/README.md @@ -0,0 +1,8 @@ +1、推理加速 +conda create -n vllm python=3.10 +conda install pytorch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 pytorch-cuda=12.1 -c pytorch -c nvidia + + +python -m vllm.entrypoints.openai.api_server --tensor-parallel-size=1 --trust-remote-code --max-model-len 1024 --model THUDM/chatglm3-6b + +python -m vllm.entrypoints.openai.api_server --host 127.0.0.1 --port 8101 --tensor-parallel-size=1 --trust-remote-code --max-model-len 1024 --model THUDM/chatglm3-6b \ No newline at end of file