[{"title":"( 47 个子文件 6.36MB ) 算法部署-使用TensorRT-LLM部署大模型-附详细优化+分析流程教程-优质大模型部署项目实战.zip","children":[{"title":"TensorRT-LLM-ChatGLM3-main","children":[{"title":"app.py <span style='color:#111;'> 5.38KB </span>","children":null,"spread":false},{"title":"vLLM","children":[{"title":"results.txt <span style='color:#111;'> 750B </span>","children":null,"spread":false},{"title":"langchang_chatglm3_vllm.py <span style='color:#111;'> 405B </span>","children":null,"spread":false},{"title":"chatglm3_quant_awq.py <span style='color:#111;'> 773B </span>","children":null,"spread":false},{"title":"offline_chatglm3.py <span style='color:#111;'> 564B </span>","children":null,"spread":false},{"title":"prompts.txt <span style='color:#111;'> 22B </span>","children":null,"spread":false},{"title":"model_repo","children":[{"title":"vllm_model","children":[{"title":"config.pbtxt <span style='color:#111;'> 1.70KB </span>","children":null,"spread":false},{"title":"1","children":[{"title":"model.json <span style='color:#111;'> 247B </span>","children":null,"spread":false}],"spread":true}],"spread":true}],"spread":true},{"title":"client.py <span style='color:#111;'> 8.08KB </span>","children":null,"spread":false}],"spread":true},{"title":"Triton大模型部署.pdf <span style='color:#111;'> 7.41MB </span>","children":null,"spread":false},{"title":"tensorrt_llm","children":[{"title":"run_hf.py <span style='color:#111;'> 1.94KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 3.78KB </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 0B </span>","children":null,"spread":false},{"title":"quantize.py <span style='color:#111;'> 5.64KB </span>","children":null,"spread":false},{"title":"see_chatglm3_model.py <span style='color:#111;'> 406B </span>","children":null,"spread":false},{"title":"process.py <span style='color:#111;'> 1.39KB </span>","children":null,"spread":false},{"title":"smoothquant.py <span style='color:#111;'> 5.14KB </span>","children":null,"spread":false},{"title":"requirements.txt <span style='color:#111;'> 75B </span>","children":null,"spread":false},{"title":"run_chat_trt.py <span style='color:#111;'> 7.83KB </span>","children":null,"spread":false},{"title":"build.py <span style='color:#111;'> 28.32KB </span>","children":null,"spread":false},{"title":"weight.py <span style='color:#111;'> 24.33KB </span>","children":null,"spread":false},{"title":"visualize.py <span style='color:#111;'> 2.70KB </span>","children":null,"spread":false}],"spread":false},{"title":"langchain_chatglm3.py <span style='color:#111;'> 4.21KB </span>","children":null,"spread":false},{"title":"triton_inference_server","children":[{"title":"model_repo","children":[{"title":"postprocessing","children":[{"title":"config.pbtxt <span style='color:#111;'> 2.85KB </span>","children":null,"spread":false},{"title":"1","children":[{"title":"model.py <span style='color:#111;'> 9.15KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"model.cpython-310.pyc <span style='color:#111;'> 4.79KB </span>","children":null,"spread":false}],"spread":true}],"spread":true}],"spread":true},{"title":"ensemble","children":[{"title":"config.pbtxt <span style='color:#111;'> 9.47KB </span>","children":null,"spread":false}],"spread":true},{"title":"tensorrt_llm","children":[{"title":"config.pbtxt <span style='color:#111;'> 8.03KB </span>","children":null,"spread":false}],"spread":true},{"title":"tensorrt_llm_bls","children":[{"title":"config.pbtxt <span style='color:#111;'> 4.46KB </span>","children":null,"spread":false},{"title":"1","children":[{"title":"model.py <span style='color:#111;'> 15.16KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"model.cpython-310.pyc <span style='color:#111;'> 7.00KB </span>","children":null,"spread":false}],"spread":false}],"spread":true}],"spread":true},{"title":"preprocessing","children":[{"title":"config.pbtxt <span style='color:#111;'> 3.54KB </span>","children":null,"spread":false},{"title":"1","children":[{"title":"model.py <span style='color:#111;'> 14.70KB </span>","children":null,"spread":false},{"title":"__pycache__","children":[{"title":"model.cpython-310.pyc <span style='color:#111;'> 8.53KB </span>","children":null,"spread":false}],"spread":false}],"spread":true}],"spread":true}],"spread":true}],"spread":true},{"title":"img","children":[{"title":"content.jpg <span style='color:#111;'> 92.08KB </span>","children":null,"spread":false},{"title":"face.jpg <span style='color:#111;'> 43.81KB </span>","children":null,"spread":false}],"spread":true},{"title":"service","children":[{"title":"knowledge_service.py <span style='color:#111;'> 3.09KB </span>","children":null,"spread":false},{"title":"utils.py <span style='color:#111;'> 3.78KB </span>","children":null,"spread":false},{"title":"__init__.py <span style='color:#111;'> 58B </span>","children":null,"spread":false},{"title":"chatglm_service.py <span style='color:#111;'> 1.51KB </span>","children":null,"spread":false},{"title":"chatglm_triton_service.py <span style='color:#111;'> 9.33KB </span>","children":null,"spread":false},{"title":"chatglm_trtllm_service.py <span style='color:#111;'> 6.90KB </span>","children":null,"spread":false},{"title":"config.py <span style='color:#111;'> 593B </span>","children":null,"spread":false}],"spread":true},{"title":"langchain_chatglm3_triton.py <span style='color:#111;'> 4.11KB </span>","children":null,"spread":false},{"title":"end_to_end_grpc_client.py <span style='color:#111;'> 11.98KB </span>","children":null,"spread":false},{"title":"requirements.txt <span style='color:#111;'> 304B </span>","children":null,"spread":false},{"title":"README.md <span style='color:#111;'> 591B </span>","children":null,"spread":false}],"spread":false}],"spread":true}]