-
Notifications
You must be signed in to change notification settings - Fork 0
/
calc_llm_list.py
115 lines (106 loc) · 32.5 KB
/
calc_llm_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
text = """
| Meta | [LLaMA](https://github.com/facebookresearch/llama) | en | - | LLaMA-13B outperforms GPT-3(175B) and LLaMA-65B is competitive to PaLM-540M.<br />Base model for most follow-up works. |
| @ggerganov | [llama.cpp](https://github.com/ggerganov/llama.cpp) | en | LLaMA | c/cpp implementation for llama and some other models, using quantization. |
| Stanford | [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) | en | LLaMA/OPT | use 52K instruction-following data generated by Self-Instructt techniques to fine-tune 7B LLaMA,<br /> the resulting model, Alpaca, behaves similarly to the `text-davinci-003` model on the Self-Instruct instruction-following evaluation suite.<br />Alpaca has inspired many follow-up models. |
| LianJiaTech | [BELLE](https://github.com/LianjiaTech/BELLE) | en/zh | BLOOMZ-7B1-mt | maybe the first Chinese model to follow Alpaca. |
| Tsinghua | [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) | en/zh | GLM | well-known Chinese model, in chat mode, and can run on single GPU. |
| Databricks | [Dolly](https://github.com/databrickslabs/dolly) | en | GPT-J 6B | use Alpaca data to fine-tune a 2-year-old model: GPT-J, which exhibits surprisingly high quality<br /> instruction following behavior not characteristic of the foundation model on which it is based. |
| @tloen | [Alpaca-LoRA](https://github.com/tloen/alpaca-lora) | en | LLaMA-7B | trained within hours on a single RTX 4090,<br />reproducing the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) results using [low-rank adaptation (LoRA)](https://arxiv.org/pdf/2106.09685.pdf),<br />and can run on a Raspberry pi. |
| ColossalAI | [ColossalChat](https://github.com/hpcaitech/ColossalAI/blob/main/applications/Chat/README.md) | en/zh | LLaMA-7B | provides a unified large language model framework, including:<br />Supervised datasets collection<br />Supervised instructions fine-tuning<br />Reward model training<br />RLHF<br />Quantization inference<br />Fast model deploying<br />Perfectly integrated with the Hugging Face ecosystem |
| Shanghai AI Lab | [LLaMA-Adapter](https://github.com/ZrrSkywalker/LLaMA-Adapter) | en | LLaMA-7B | Fine-tuning LLaMA to follow instructions within 1 Hour and 1.2M Parameters |
| PhoebusSi | [Alpaca-CoT](https://github.com/PhoebusSi/Alpaca-CoT) | en/zh | LLaMA<br />ChatGLM-6B<br />BLOOM | extend CoT data to Alpaca to boost its reasoning ability.<br />aims to build an instruction finetuning (IFT) platform with extensive instruction collection (especially the CoT datasets)<br /> and a unified interface for various large language models. |
| AetherCortex | [Llama-X](https://github.com/AetherCortex/Llama-X) | en | LLaMA | Open Academic Research on Improving LLaMA to SOTA LLM |
| TogetherComputer | [OpenChatKit](https://github.com/togethercomputer/OpenChatKit) | en | GPT-NeoX-20B | OpenChatKit provides a powerful, open-source base to create both specialized and general purpose chatbots for various applications.<br /> The kit includes an instruction-tuned language models, a moderation model, and an extensible retrieval system for including <br />up-to-date responses from custom repositories. |
| nomic-ai | [GPT4All](https://github.com/nomic-ai/gpt4all) | en | LLaMA | trained on a massive collection of clean assistant data including code, stories and dialogue |
| @ymcui | [Chinese-LLaMA-Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) | en/zh | LLaMA-7B/13B | expand the Chinese vocabulary based on the original LLaMA and use Chinese data for secondary pre-training,<br /> further enhancing Chinese basic semantic understanding. Additionally, the project uses Chinese instruction data<br /> for fine-tuning on the basis of the Chinese LLaMA, significantly improving the model's understanding and execution of instructions. |
| UC Berkley<br />Stanford<br />CMU | [Vicuna](https://github.com/lm-sys/FastChat) | en | LLaMA-13B | Impressing GPT-4 with 90% ChatGPT Quality |
| @NouamaneTazi | [bloomz.cpp](https://github.com/NouamaneTazi/bloomz.cpp) | en/zh | BLOOM | C++ implementation for BLOOM inference. |
| HKUST | [LMFlow](https://github.com/OptimalScale/LMFlow) / [RAFT](https://optimalscale.github.io/LMFlow/examples/raft.html) | en/zh | LLaMA<br />Galatica<br />GPT-2<br />... | LMFlow is an extensible, convenient, and efficient toolbox for finetuning large machine learning models, designed to be user-friendly,<br /> speedy and reliable, and accessible to the entire community.<br />RAFT is a new alignment algorithm, which is more efficient than conventional (PPO-based) RLHF. |
| [Cerebras Systems](https://www.cerebras.net/) | [Cerebras-GPT](https://huggingface.co/cerebras/Cerebras-GPT-13B) | en | - | Pretrained LLM, GPT-3 like, Commercially available, efficiently trained on the[Andromeda](https://www.cerebras.net/andromeda/) AI supercomputer,<br />trained in accordance with [Chinchilla scaling laws](https://arxiv.org/abs/2203.15556) (20 tokens per model parameter) which is compute-optimal. |
| UT Southwestern/<br />UIUC/OSU/HDU | [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) | en | LLaMA | Maybe the first domain-specific chat model tuned on LLaMA. |
| LAION-AI | [Open Assistant](https://github.com/LAION-AI/Open-Assistant) | en | GPT-J<br />CodeGen<br />FlanT5<br />GPT-JT | a project meant to give everyone access to a great chat based large language model. |
| UCSD/SYSU | [baize](https://github.com/project-baize/baize) | en<br />zh(comming soon) | LLaMA | fine-tuned with[LoRA](https://github.com/microsoft/LoRA). It uses 100k dialogs generated by letting ChatGPT chat with itself. <br />Alpaca's data is also used to improve its performance. |
| UC Berkley | [Koala](https://github.com/young-geng/EasyLM) | en | LLaMA | Rather than maximizing*quantity* by scraping as much web data as possible, the team focus on collecting a small *high-quality* dataset. |
| @imClumsyPanda | [langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM) | en/zh | ChatGLM-6B | local knowledge based ChatGLM with langchain. |
| @yangjianxin1 | [Firefly](https://github.com/yangjianxin1/Firefly) | zh | bloom-1b4-zh<br />bloom-2b6-zh | Instruction Tuning on Chinese dataset. Vocabulary pruning, ZeRO, and tensor parallelism<br /> are used to effectively reduce memory consumption and improve training efficiency. |
| microsoft | [GPT-4-LLM](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM) | en/zh | LLaMA | aims to share data generated by GPT-4 for building an instruction-following LLMs with supervised learning and reinforcement learning. |
| EleutherAI | [pythia](https://github.com/EleutherAI/pythia) | en | - | combine interpretability analysis and scaling laws to understand how knowledge develops<br /> and evolves during training in autoregressive transformers. |
| Hugging Face | [StackLLaMA](https://huggingface.co/trl-lib/llama-7b-se-rl-peft) | en | LLaMA | trained on StackExchange data and the main goal is to serve as a tutorial and walkthrough on<br /> how to train model with RLHF and not primarily model performance. |
| Nebuly | [ChatLLaMA](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllam) | en | - | a library that allows you to create hyper-personalized ChatGPT-like assistants using your own data and the least amount of compute possible. |
| @juncongmoo | [ChatLLaMA](https://github.com/juncongmoo/chatllama) | en | LLaMA | LLaMA-based RLHF model, runnable in a single GPU. |
| @juncongmoo | [minichatgpt](https://github.com/juncongmoo/minichatgpt) | en | GPT/OPT ... | To Train ChatGPT In 5 Minutes with ColossalAI. |
| @LC1332 | [Luotuo-Chinese-LLM](https://github.com/LC1332/Luotuo-Chinese-LLM) | zh | LLaMA/ChatGLM | Instruction fine-tuned Chinese Language Models, with colab provided! |
| @Facico | [Chinese-Vicuna](https://github.com/Facico/Chinese-Vicuna) | zh | LLaMA | A Chinese Instruction-following LLaMA-based Model, fine-tuned with Lora, cpp inference supported, colab provided. |
| @yanqiangmiffy | [InstructGLM](https://github.com/yanqiangmiffy/InstructGLM) | en/zh | ChatGLM-6B | ChatGLM based instruction-following model, fine-tuned on a variety of data sources, supports deepspeed accelerating and LoRA. |
| alibaba | [Wombat](https://github.com/GanjinZero/RRHF) | en | LLaMA | a novel learning paradigm called RRHF, as an alternative of RLHF, is proposed, which scores responses generated by<br /> different sampling policies and learns to align them with human preferences through ranking loss. And the performance<br />is comparable to RLHF, with less models used in the process. |
| microsoft | [deepspeed-chat](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat) | - | - | Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All Scales. |
| @WuJunde | [alpaca-glassoff](https://github.com/WuJunde/alpaca-glassoff) | en | LLaMA | a mini image-acceptable Chat AI can run on your own laptop, based on[stanford-alpaca](https://github.com/tatsu-lab/stanford_alpaca) and [alpaca-lora](https://github.com/tloen/alpaca-lora). |
| Cambridge | [Visual Med-Alpaca](https://github.com/cambridgeltl/visual-med-alpaca) | en | LLaMA-7B | a multi-modal foundation model designed specifically for the biomedical domain |
| @JosephusCheung | [Guanaco](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset) | en/zh/jp/de | LLaMA-7B | A Multilingual Instruction-Following Language Model |
| KAUST | [CAMEL](https://github.com/lightaime/camel) | en/zh/jp/de ... | LLaMA | a novel communicative agent framework named*role-playing,* using *inception prompting* to<br /> guide chat agents toward task completion while maintaining consistency with human intentions. |
| BaihaiAI | [IDPChat](https://github.com/BaihaiAI/IDPChat) | en/zh | LLaMA-13B<br />Stable-diffusion | Chinese multi-modal model, single GPU runnable, easy to deploy, UI provided. |
| BlinkDL | [ChatRWKV](https://github.com/BlinkDL/ChatRWKV) | en/zh | **RNN** | powered by RWKV (**100% RNN)**, Training sponsored by Stability EleutherAI. |
| @FreedomIntelligence | [LLM Zoo](https://github.com/FreedomIntelligence/LLMZoo) | multi | BLOOMZ/LLaMA | a project that provides data, models, and evaluation benchmark for large language models.<br />model released: Phoenix, Chimera |
| KAUST | [MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4) | en/zh | LLaMA | MiniGPT-4 aligns a frozen visual encoder from BLIP-2 with a frozen LLM, Vicuna, using just one projection layer,<br /> and yields many emerging vision-language capabilities similar to those demonstrated in GPT-4. |
| HIT | [Huatuo](https://github.com/SCIR-HI/Huatuo-Llama-Med-Chinese) / [ChatGLM-Med](https://github.com/SCIR-HI/Med-ChatGLM) | zh | LLaMA/ChatGLM | fine-tuned with Chinese medical knowledge dataset, which is generated by using gpt3.5 api. |
| UW–Madison/MSR<br />/Columbia University | [LLaVA](https://github.com/haotian-liu/LLaVA) | en | LLaMA | visual instruction tuning is proposed, towards building large language and vision models with GPT-4 level capabilities. |
| Stability-AI | [StableLM](https://github.com/Stability-AI/StableLM) | en | - | Stability AI Language Models. |
| ShanghaiTech, etc | [DoctorGLM](https://github.com/xionghonglin/DoctorGLM) | en/zh | ChatGLM-6B | Chinese medical consultation model fine-tuned on ChatGLM-6B. |
| TogetherComputer | [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data) | en | - | An Open Source Recipe to Reproduce LLaMA training dataset. |
| FDU | [MOSS](https://github.com/OpenLMLab/MOSS) | en/zh | - | An open-source tool-augmented conversational language model from Fudan University. |
| ssymmetry & FDU | [BBT-2](https://bbt.ssymmetry.com/) | zh | - | 120B open-source LM. |
| Tsinghua AIR | [BioMedGPT-1.6B](https://github.com/BioFM/OpenBioMed) | en/zh | - | a pre-trained multi-modal molecular foundation model with 1.6B parameters that associates 2D molecular graphs with texts. |
"""
import re
from bean.llm import LLM
from datetime import date
import json
PATTERN_LINK = re.compile(r"\[(.*?)\]\((.*?)\)")
line = "| Meta | [LLaMA](https://github.com/facebookresearch/llama) | en | - | LLaMA-13B outperforms GPT-3(175B) and LLaMA-65B is competitive to PaLM-540M.<br />Base model for most follow-up works. |"
def parse_line_to_json(i: int, line: str):
elements = line.split("|")
elements = [e.strip() for e in elements]
# print(elements)
vendor = elements[1]
name, url = PATTERN_LINK.findall(elements[2])[0]
intro = elements[5]
llm = LLM(id=i, name=name,vendor=vendor,intro=intro,url=url,region="",publish_time=None)
return llm.json()
# print(parse_line_to_json(1))
# names = []
# for line in text.splitlines():
# if not line: continue
# elements = line.split("|")
# elements = [e.strip() for e in elements]
# print(elements[2])
# name, _ = PATTERN_LINK.findall(elements[2])[0]
# names.append(name)
# print(names)
# llms = []
# for i, line in enumerate(text.splitlines()):
# if not line: continue
# obj = json.loads(parse_line_to_json(i, line))
# del obj["voted"]
# del obj["vote_count"]
# llms.append(obj)
# json.dump(llms, open("llms.json", "w", encoding="utf-8"), ensure_ascii=False, indent=2)
# def get_github_repo_create_time(github_url: str):
# import requests
# import json
# api_url = "https://api.github.com/repos/" + github_url.split("github.com/")[1]
# resp = requests.get(api_url)
# if resp.status_code != 200:
# return None
# data = json.loads(resp.text)
# return data["created_at"]
if __name__ == "__main__":
import json
import re
PATTERN_MD_LINK = re.compile(r"\[(.*?)\]\((.*?)\)")
llms = json.load(open("llms.json", encoding="utf-8"))
for llm in llms:
intro = llm["intro"]
# 把 [name](url) 替换成 <a href="url">name</a>
intro = PATTERN_MD_LINK.sub(r'<a href="\2">\1</a>', intro)
llm["intro"] = intro
json.dump(llms, open("llms.json", "w", encoding="utf-8"), ensure_ascii=False, indent=2)