os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2-7B-Instruct", #"/mnt/data/xuhu/qwen/Qwen2-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained("/mnt/data/xuhu/qwen/Qwen2-7B-Instruct")