{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"opencompass","owner":"open-compass","isFork":false,"description":"OpenCompass is an LLM evaluation platform, supporting a wide range of models (Llama3, Mistral, InternLM2,GPT-4,LLaMa2, Qwen,GLM, Claude, etc) over 100+ datasets.","topicNames":["benchmark","evaluation","openai","llm","chatgpt","large-language-model","llama2","llama3"],"topicsNotShown":0,"allTopics":["benchmark","evaluation","openai","llm","chatgpt","large-language-model","llama2","llama3"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":16,"issueCount":96,"starsCount":2793,"forksCount":296,"license":"Apache License 2.0","participation":[0,0,0,0,0,17,33,16,7,15,11,24,19,16,20,16,12,19,0,5,7,5,14,11,17,15,14,7,18,11,12,12,13,11,15,8,15,0,4,7,12,11,9,4,3,4,6,8,19,8,14,6],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-22T13:44:28.044Z"}},{"type":"Public","name":"VLMEvalKit","owner":"open-compass","isFork":false,"description":"Open-source evaluation toolkit of large vision-language models (LVLMs), support GPT-4v, Gemini, QwenVLPlus, 40+ HF models, 20+ benchmarks","topicNames":["computer-vision","evaluation","pytorch","gemini","openai","vqa","vit","gpt","multi-modal","clip"],"topicsNotShown":9,"allTopics":["computer-vision","evaluation","pytorch","gemini","openai","vqa","vit","gpt","multi-modal","clip","claude","openai-api","gpt4","large-language-models","llm","chatgpt","llava","qwen","gpt-4v"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":13,"starsCount":479,"forksCount":54,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,30,19,15,26,35,17,26,28,23,10,2,1,10,0,8,13,8,9,8,20,32,14,16,8,22],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-22T08:51:07.030Z"}},{"type":"Public","name":"CodeBench","owner":"open-compass","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T11:38:31.710Z"}},{"type":"Public","name":"MathBench","owner":"open-compass","isFork":false,"description":"[ACL 2024 Findings] MathBench: A Comprehensive Multi-Level Difficulty Mathematics Evaluation Dataset","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":1,"issueCount":1,"starsCount":41,"forksCount":1,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,4,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,9],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T08:38:33.675Z"}},{"type":"Public","name":"Ada-LEval","owner":"open-compass","isFork":false,"description":"The official implementation of \"Ada-LEval: Evaluating long-context LLMs with length-adaptable benchmarks\"","topicNames":["gpt4","llm","long-context"],"topicsNotShown":0,"allTopics":["gpt4","llm","long-context"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":43,"forksCount":2,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41,2,5,3,2,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-22T09:36:27.993Z"}},{"type":"Public","name":".github","owner":"open-compass","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,2,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-16T11:21:20.928Z"}},{"type":"Public","name":"DevBench","owner":"open-compass","isFork":false,"description":"A Comprehensive Benchmark for Software Development.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":1,"starsCount":67,"forksCount":4,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,6,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-04T10:40:51.870Z"}},{"type":"Public","name":"T-Eval","owner":"open-compass","isFork":false,"description":"[ACL2024] T-Eval: Evaluating Tool Utilization Capability of Large Language Models Step by Step","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":26,"starsCount":166,"forksCount":10,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,7,5,19,4,2,0,0,0,6,3,3,0,0,0,1,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-03T21:05:37.907Z"}},{"type":"Public","name":"human-eval","owner":"open-compass","isFork":true,"description":"Code for the paper \"Evaluating Large Language Models Trained on Code\"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":293,"license":"MIT License","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-14T11:55:53.553Z"}},{"type":"Public","name":"OpenFinData","owner":"open-compass","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":14,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-08T06:32:20.542Z"}},{"type":"Public","name":"CriticBench","owner":"open-compass","isFork":false,"description":"A comprehensive benchmark for evaluating critique ability of LLMs","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":21,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-24T01:31:08.418Z"}},{"type":"Public","name":"code-evaluator","owner":"open-compass","isFork":false,"description":"A multi-language code evaluation tool.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":16,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-26T04:12:27.117Z"}},{"type":"Public","name":"MMBench","owner":"open-compass","isFork":false,"description":"Official Repo of \"MMBench: Is Your Multi-modal Model an All-around Player?\"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":8,"starsCount":99,"forksCount":5,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,5,0,0,2,0,0,0,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-08T09:59:44.556Z"}},{"type":"Public","name":"evalplus","owner":"open-compass","isFork":true,"description":"EvalPlus for rigourous evaluation of LLM-synthesized code","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":82,"license":"Apache License 2.0","participation":[0,7,0,3,9,0,0,2,2,0,4,0,2,3,3,0,0,0,3,0,10,21,6,3,7,1,17,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-20T08:13:21.996Z"}},{"type":"Public","name":"MixtralKit","owner":"open-compass","isFork":false,"description":"A toolkit for inference and evaluation of 'mixtral-8x7b-32kseqlen' from Mistral AI","topicNames":["moe","mistral","llm"],"topicsNotShown":0,"allTopics":["moe","mistral","llm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":12,"starsCount":758,"forksCount":81,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-15T19:10:55.603Z"}},{"type":"Public","name":"LawBench","owner":"open-compass","isFork":false,"description":"Benchmarking Legal Knowledge of Large Language Models","topicNames":["law","benchmark","llm","chatgpt"],"topicsNotShown":0,"allTopics":["law","benchmark","llm","chatgpt"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":184,"forksCount":27,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,39,20,9,12,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-13T06:42:45.191Z"}},{"type":"Public","name":"BotChat","owner":"open-compass","isFork":false,"description":"Evaluating LLMs' multi-round chatting capability via assessing conversations generated by two LLM instances.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":1,"starsCount":102,"forksCount":4,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,11,11,0,5,4,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-02T12:38:19.203Z"}},{"type":"Public","name":"pytorch_sphinx_theme","owner":"open-compass","isFork":true,"description":"Sphinx Theme for OpenCompass - Modified from PyTorch","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"CSS","color":"#563d7c"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":128,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-30T04:20:35.062Z"}}],"repositoryCount":18,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}