# 05.8G显存玩转书生大模型

# 1.使用Cli Demo完成 InternLM2-Chat-1.8B 模型的部署，并生成 300 字小故事

加载1.8b模型，先准备环境。环境初始化可以查看轻松玩转书生·浦语大模型趣味 Demo (opens new window)

# 初始化环境。如果已经初始化过，可以跳过
studio-conda -o internlm-base -t demo
conda activate demo
pip install huggingface-hub==0.17.3
pip install transformers==4.34 
pip install psutil==5.9.8
pip install accelerate==0.24.1
pip install streamlit==1.32.2 
pip install matplotlib==3.8.3 
pip install modelscope==1.9.5
pip install sentencepiece==0.1.99

1
2
3
4
5
6
7
8
9
10
11

加下来下载模型，保存下边代码到python文件并运行。

import os
from modelscope.hub.snapshot_download import snapshot_download

# 创建保存模型目录
#os.system("mkdir /root/models")

# save_dir是模型保存到本地的目录
save_dir="/root/models"

model = snapshot_download("Shanghai_AI_Laboratory/internlm2-chat-1_8b", 
                  cache_dir=save_dir, 
                  revision='v1.1.0')

print(f"model:{model}")

1
2
3
4
5
6
7
8
9
10
11
12
13
14

模型文件下载完成之后，通过如下脚本cli_demo.py启动：

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

#上边下载模型的路径
model_name_or_path = "/root/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, device_map='cuda:0')
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='cuda:0')
model = model.eval()

system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文.
"""

messages = [(system_prompt, '')]

print("=============Welcome to InternLM chatbot, type 'exit' to exit.=============")

while True:
    input_text = input("\nUser  >>> ")
    input_text = input_text.replace(' ', '')
    if input_text == "exit":
        break

    length = 0
    for response, _ in model.stream_chat(tokenizer, input_text, messages):
        if response is not None:
            print(response[length:], flush=True, end="")
            length = len(response)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

启动命令：python3 cli_demo.py

# 2.使用 LMDeploy 完成 InternVL2-2B 的部署

InternVL2 是上海人工智能实验室推出的新一代视觉-语言多模态大模型，是首个综合性能媲美国际闭源商业模型的开源多模态大模型。InternVL2 系列从千亿大模型到端侧小模型全覆盖，通专融合，支持多种模态。

下边是通过lmdeploy部署InternVL2-2B模型。模型仓库InternVL2-2B (opens new window)

#安装lmdeploy
conda activate demo
pip install lmdeploy[all]==0.5.1
pip install timm==1.0.7

1
2
3
4

通过gradio方式启动

命令：lmdeploy serve gradio /root/models/OpenGVLab/InternVL2-2B --cache-max-entry-count 0.1 通过端口映射，上传图片会发现core dump。暂时还没找到原因。

通过api方式启动

命令：lmdeploy serve api_server /share/new_models/OpenGVLab/InternVL2-2B --backend turbomind --server-port 23333 --chat-template chat_template.json

from openai import OpenAI

client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
model_name = client.models.list().data[0].id
print(f"modelName:{model_name}")
response = client.chat.completions.create(
    model=model_name,
    messages=[{
        'role':
        'user',
        'content': [{
            'type': 'text',
            'text': '请介绍下这张图片',
        }, {
            'type': 'image_url',
            'image_url': {
                'url':
                'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
            },
        }],
    }],
    temperature=0.8,
    top_p=0.8)
print(response)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

推理报错：

torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 226.00 MiB (GPU 0; 7.99 GiB total capacity; 733.00 MiB already allocated; 66.00 MiB free; 798.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

原因：后边升级算力，用24G显存推理模型，正常返回结果。

ChatCompletion(id='2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='这张图片展示了一只老虎。老虎是一种大型猫科动物，拥有黑色和白色相间的皮毛，通常生活在亚洲的森林和草原地区。老虎是顶级捕食者，以其迅捷和力量著称。它们通常以其他动物为食，包括鹿、鸟类、猴子和其他小型哺乳动物。\n\n图片中的老虎正躺在草地上，头部微微抬起，眼睛注视前方，显得非常警觉和威严。背景中的绿色草地和阳光为这张图片增添了自然的氛围。', role='assistant', function_call=None, tool_calls=None))], created=1722655862, model='internvl-internlm2', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=105, prompt_tokens=1845, total_tokens=1950))

← 04.书生大模型全链路开源体系 06.浦语提示词工程实践 →