{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":176982014,"defaultBranch":"main","name":"Megatron-LM","ownerLogin":"NVIDIA","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2019-03-21T16:15:52.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/1728152?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715376377.0","currentOid":""},"activityList":{"items":[{"before":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","after":"0650d8335d45162845398a97880374b81c4d84b1","ref":"refs/heads/master","pushedAt":"2024-05-23T18:37:05.000Z","pushType":"push","commitsCount":42,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'\n\nRetro bugfixes for Mcore 0.7\n\nSee merge request ADLR/megatron-lm!1460","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'"}},{"before":"c3677e09aa4e2eec37048307bd795928b8f8324a","after":"0650d8335d45162845398a97880374b81c4d84b1","ref":"refs/heads/main","pushedAt":"2024-05-23T18:36:58.000Z","pushType":"push","commitsCount":36,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'\n\nRetro bugfixes for Mcore 0.7\n\nSee merge request ADLR/megatron-lm!1460","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'"}},{"before":"c1e631517c70a3ba61a2d1a678c330fb13557f99","after":"dace0330ac8b5bbc3a21ff14f7ef7544abb16334","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-23T17:51:42.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"ericharper","name":"Eric Harper","path":"/ericharper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11999610?s=80&v=4"},"commit":{"message":"Merge branch 'cherry-pick-858f22e2' into 'core_r0.7.0'\n\nMerge branch '158-fix-the-typo-in-topk_with_capacity' into 'main'\n\nSee merge request ADLR/megatron-lm!1450","shortMessageHtmlLink":"Merge branch 'cherry-pick-858f22e2' into 'core_r0.7.0'"}},{"before":"ac8a7e586a9d0cc41a49ab0c63ea5d1877f69025","after":"c3677e09aa4e2eec37048307bd795928b8f8324a","ref":"refs/heads/main","pushedAt":"2024-05-14T16:01:15.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/workaround_for_cublas_error' into 'main'\n\nWorkaround for TE bug where it can pick the wrong cuBLAS algorithm\n\nSee merge request ADLR/megatron-lm!1436","shortMessageHtmlLink":"Merge branch 'dnarayanan/workaround_for_cublas_error' into 'main'"}},{"before":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","after":"ac8a7e586a9d0cc41a49ab0c63ea5d1877f69025","ref":"refs/heads/main","pushedAt":"2024-05-13T22:05:06.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mblaz/log-debug-verbosity' into 'main'\n\nDecrease fully parallel save/load logging verbosity\n\nSee merge request ADLR/megatron-lm!1419","shortMessageHtmlLink":"Merge branch 'mblaz/log-debug-verbosity' into 'main'"}},{"before":"b7b98ba28db132f064b4cef3f8e0ba598dc3404b","after":"c1e631517c70a3ba61a2d1a678c330fb13557f99","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-10T22:40:02.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/1427_into_0.7' into 'core_r0.7.0'\n\nMerge !1427 into branch core_r0.7.0\n\nSee merge request ADLR/megatron-lm!1434","shortMessageHtmlLink":"Merge branch 'mike/1427_into_0.7' into 'core_r0.7.0'"}},{"before":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","after":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","ref":"refs/heads/master","pushedAt":"2024-05-10T22:39:43.000Z","pushType":"push","commitsCount":30,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/token_counting_behind_argument' into 'main'\n\nPut Per-Token-Cross-Entropy calculation behind an argument\n\nSee merge request ADLR/megatron-lm!1427","shortMessageHtmlLink":"Merge branch 'mike/token_counting_behind_argument' into 'main'"}},{"before":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","after":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","ref":"refs/heads/main","pushedAt":"2024-05-10T22:39:36.000Z","pushType":"push","commitsCount":30,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/token_counting_behind_argument' into 'main'\n\nPut Per-Token-Cross-Entropy calculation behind an argument\n\nSee merge request ADLR/megatron-lm!1427","shortMessageHtmlLink":"Merge branch 'mike/token_counting_behind_argument' into 'main'"}},{"before":null,"after":"b7b98ba28db132f064b4cef3f8e0ba598dc3404b","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-10T21:26:17.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"ericharper","name":"Eric Harper","path":"/ericharper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11999610?s=80&v=4"},"commit":{"message":"update version\n\nSigned-off-by: eharper ","shortMessageHtmlLink":"update version"}},{"before":"ebb1484327af4eb73ee923bf736e44db72aa1831","after":null,"ref":"refs/tags/InstructRetro","pushedAt":"2024-05-03T19:33:45.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"}},{"before":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","after":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","ref":"refs/heads/master","pushedAt":"2024-05-03T00:23:11.000Z","pushType":"push","commitsCount":111,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'\n\n[MLPerf] GPT dataset features: drop last partial validation sequence, drop extra token, return sample with 1s loss mask, mock dataset testing\n\nSee merge request ADLR/megatron-lm!1223","shortMessageHtmlLink":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'"}},{"before":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","after":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","ref":"refs/heads/main","pushedAt":"2024-05-03T00:23:05.000Z","pushType":"push","commitsCount":111,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'\n\n[MLPerf] GPT dataset features: drop last partial validation sequence, drop extra token, return sample with 1s loss mask, mock dataset testing\n\nSee merge request ADLR/megatron-lm!1223","shortMessageHtmlLink":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'"}},{"before":"432683220e5b0eddce2ec0a251c3a0b16cdbff61","after":"3c0f3ef38c645043be18e51ce2698b37eaceb128","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-05-01T12:31:27.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'rachitg/ag' into 'core_r0.7.0.beta'\n\nallow disabling qkv or fc1 overlap\n\nSee merge request ADLR/megatron-lm!1398","shortMessageHtmlLink":"Merge branch 'rachitg/ag' into 'core_r0.7.0.beta'"}},{"before":"85a3a1599231fe07012109dc38fdde0c30061d8e","after":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","ref":"refs/heads/master","pushedAt":"2024-04-26T17:56:44.000Z","pushType":"push","commitsCount":94,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/check_param_hashes' into 'main'\n\nCompute hashes on each rank, and compare across DP replicas\n\nSee merge request ADLR/megatron-lm!1368","shortMessageHtmlLink":"Merge branch 'dnarayanan/check_param_hashes' into 'main'"}},{"before":"ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2","after":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","ref":"refs/heads/main","pushedAt":"2024-04-26T17:56:30.000Z","pushType":"push","commitsCount":10,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/check_param_hashes' into 'main'\n\nCompute hashes on each rank, and compare across DP replicas\n\nSee merge request ADLR/megatron-lm!1368","shortMessageHtmlLink":"Merge branch 'dnarayanan/check_param_hashes' into 'main'"}},{"before":"fa5fed80fdb4120aa7d3ec8bd68ab4baec5c4fdf","after":"432683220e5b0eddce2ec0a251c3a0b16cdbff61","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-24T22:27:58.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'jbaczek/extend_transformer_block_spec' into 'core_r0.7.0.beta'\n\nAdd layer norm to TransformerBlockSubmodules\n\nSee merge request ADLR/megatron-lm!1350","shortMessageHtmlLink":"Merge branch 'jbaczek/extend_transformer_block_spec' into 'core_r0.7.…"}},{"before":"d9abf064f82f3f768b4f75a30704f01374370328","after":"ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2","ref":"refs/heads/main","pushedAt":"2024-04-18T22:09:08.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'fix_overlap_param_gather' into 'main'\n\nfix EP distopt with overlap param gather\n\nSee merge request ADLR/megatron-lm!1345","shortMessageHtmlLink":"Merge branch 'fix_overlap_param_gather' into 'main'"}},{"before":"b26d3e36a7d65e1562f6e6d2e6c18a24624f27e5","after":"cac60ce4c8203e2ed18912be63d4dd577b46830d","ref":"refs/heads/core_r0.6.0","pushedAt":"2024-04-18T22:08:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'fix_overlap_param_gather' into 'main'\n\nfix EP distopt with overlap param gather\n\nSee merge request ADLR/megatron-lm!1345\n\n(cherry picked from commit ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2)\n\nac93d847 fix EP distopt with overlap param gather\nbb7b4307 change golden metrics\n0ff731ff Minor fix to thrown value error","shortMessageHtmlLink":"Merge branch 'fix_overlap_param_gather' into 'main'"}},{"before":"299f96ffe61a4bae9044a2082570b19b94d13335","after":"b26d3e36a7d65e1562f6e6d2e6c18a24624f27e5","ref":"refs/heads/core_r0.6.0","pushedAt":"2024-04-18T19:28:34.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'cherry-pick-ad354e08' into 'core_r0.6.0'\n\nCherry-pick pip package fix into core_r0.6.0\n\nSee merge request ADLR/megatron-lm!1361","shortMessageHtmlLink":"Merge branch 'cherry-pick-ad354e08' into 'core_r0.6.0'"}},{"before":"2196398f5252ead6f036b06d45f7acb89b1308da","after":"d9abf064f82f3f768b4f75a30704f01374370328","ref":"refs/heads/main","pushedAt":"2024-04-18T19:28:03.000Z","pushType":"push","commitsCount":16,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'squared_relu_fusion' into 'main'\n\nAdded fusion for squared relu\n\nSee merge request ADLR/megatron-lm!1363","shortMessageHtmlLink":"Merge branch 'squared_relu_fusion' into 'main'"}},{"before":"67bfe999ea3edfc64022f144a314ae7a48adcae5","after":"fa5fed80fdb4120aa7d3ec8bd68ab4baec5c4fdf","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-18T00:12:30.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'amax_group_fix' into 'core_r0.7.0.beta'\n\nChange AMAX reduction group to use TP group\n\nSee merge request ADLR/megatron-lm!1362","shortMessageHtmlLink":"Merge branch 'amax_group_fix' into 'core_r0.7.0.beta'"}},{"before":null,"after":"67bfe999ea3edfc64022f144a314ae7a48adcae5","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-16T23:40:48.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Update package information","shortMessageHtmlLink":"Update package information"}},{"before":"caf2007e080d65dd7488be7bd409b366e225ab5f","after":"2196398f5252ead6f036b06d45f7acb89b1308da","ref":"refs/heads/main","pushedAt":"2024-04-12T23:05:09.000Z","pushType":"push","commitsCount":6,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'rachitg/fp8dpa' into 'main'\n\nRachitg/fp8dpa\n\nSee merge request ADLR/megatron-lm!1332","shortMessageHtmlLink":"Merge branch 'rachitg/fp8dpa' into 'main'"}},{"before":"fbb375d4b5e88ce52f5f7125053068caff47f93f","after":"caf2007e080d65dd7488be7bd409b366e225ab5f","ref":"refs/heads/main","pushedAt":"2024-04-12T22:07:38.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'maanug/gen-test-scripts' into 'main'\n\nLocal JET test script generator\n\nSee merge request ADLR/megatron-lm!1315","shortMessageHtmlLink":"Merge branch 'maanug/gen-test-scripts' into 'main'"}},{"before":"b5aba3a2f3165da8b4f6b483bf3a6da2a24718e4","after":"fbb375d4b5e88ce52f5f7125053068caff47f93f","ref":"refs/heads/main","pushedAt":"2024-04-12T19:09:38.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'jxin/new_tp_dp_mapping' into 'main'\n\nSupport alternative mapping TP->PP->DP\n\nSee merge request ADLR/megatron-lm!1183","shortMessageHtmlLink":"Merge branch 'jxin/new_tp_dp_mapping' into 'main'"}},{"before":"3b25065a9033fde23227d7d8bb84cebdfdcf9562","after":"b5aba3a2f3165da8b4f6b483bf3a6da2a24718e4","ref":"refs/heads/main","pushedAt":"2024-04-12T16:36:52.000Z","pushType":"push","commitsCount":18,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'boxiangw/mlperf-parallel-group-init-fix' into 'main'\n\n[MLPerf] Bug fix of parallel group init\n\nSee merge request ADLR/megatron-lm!1314","shortMessageHtmlLink":"Merge branch 'boxiangw/mlperf-parallel-group-init-fix' into 'main'"}},{"before":"f9349eb033b13f68bcf71952fdf4b2004a5e4300","after":"299f96ffe61a4bae9044a2082570b19b94d13335","ref":"refs/heads/core_r0.6.0","pushedAt":"2024-04-12T00:46:50.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-converge-0.6' into 'main'\n\nRemove vocab_size in pretrain_retro.py.\n\nSee merge request ADLR/megatron-lm!1329\n\n(cherry picked from commit 3b25065a9033fde23227d7d8bb84cebdfdcf9562)\n\nacacd62f remove vocab_size.\n0ad72e07 clean up.","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-converge-0.6' into 'main'"}},{"before":"f3a3020031f384ddafd9b7e9f3a587798c0aea21","after":"3b25065a9033fde23227d7d8bb84cebdfdcf9562","ref":"refs/heads/main","pushedAt":"2024-04-12T00:46:27.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-converge-0.6' into 'main'\n\nRemove vocab_size in pretrain_retro.py.\n\nSee merge request ADLR/megatron-lm!1329","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-converge-0.6' into 'main'"}},{"before":"7fe863f3d94f7b64a927b04b85f5c9339d3fb784","after":"f3a3020031f384ddafd9b7e9f3a587798c0aea21","ref":"refs/heads/main","pushedAt":"2024-04-11T00:03:18.000Z","pushType":"push","commitsCount":10,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'anmolg/disable_grad_reduce' into 'main'\n\noption to disable grad reduce for column parallel linear layer\n\nSee merge request ADLR/megatron-lm!1328","shortMessageHtmlLink":"Merge branch 'anmolg/disable_grad_reduce' into 'main'"}},{"before":"ba773259dbe5735fbd91ca41e7f4ded60b335c52","after":"7fe863f3d94f7b64a927b04b85f5c9339d3fb784","ref":"refs/heads/main","pushedAt":"2024-04-09T20:22:02.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'hn-tokenizer-imports' into 'main'\n\nFix some more imports from the refactor\n\nSee merge request ADLR/megatron-lm!1325","shortMessageHtmlLink":"Merge branch 'hn-tokenizer-imports' into 'main'"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEUlfDSwA","startCursor":null,"endCursor":null}},"title":"Activity · NVIDIA/Megatron-LM"}