{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":619198044,"defaultBranch":"main","name":"st-moe-pytorch","ownerLogin":"lucidrains","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-03-26T15:00:16.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/108653?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1717541711.0","currentOid":""},"activityList":{"items":[{"before":"6b7f7fbb93610134c902efdfe096e06fe5a7d6b5","after":"d7669d43789a01c498511e0f8471eef350745d3e","ref":"refs/heads/main","pushedAt":"2024-06-04T22:55:08.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"0.1.8","shortMessageHtmlLink":"0.1.8"}},{"before":"09c14b26c9c6a34409ab1c6a68cc39522e3c4350","after":"6b7f7fbb93610134c902efdfe096e06fe5a7d6b5","ref":"refs/heads/main","pushedAt":"2024-02-29T15:30:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"remove erroneous backwards for split_by_rank","shortMessageHtmlLink":"remove erroneous backwards for split_by_rank"}},{"before":null,"after":"7b72bbe49d6c2c18fb6241324453d2ff2b86d4d4","ref":"refs/heads/fix-split-by-rank","pushedAt":"2024-02-29T15:08:21.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix","shortMessageHtmlLink":"fix"}},{"before":"8eb41cc552d7e2c28bd41225064c464f19c1948a","after":"09c14b26c9c6a34409ab1c6a68cc39522e3c4350","ref":"refs/heads/main","pushedAt":"2024-01-24T19:25:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"cleanup","shortMessageHtmlLink":"cleanup"}},{"before":"1957771199b37fd1cb96aee8138b86f868ec276e","after":"8eb41cc552d7e2c28bd41225064c464f19c1948a","ref":"refs/heads/main","pushedAt":"2024-01-24T14:05:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"address https://github.com/lucidrains/st-moe-pytorch/issues/4","shortMessageHtmlLink":"address #4"}},{"before":"51727d007b09c01936d4a365ad0aa6569c530cf0","after":"1957771199b37fd1cb96aee8138b86f868ec276e","ref":"refs/heads/main","pushedAt":"2023-12-14T20:14:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"make sure contiguous","shortMessageHtmlLink":"make sure contiguous"}},{"before":"d9f5f0891115ecd31290d92f510d56625eb2d417","after":"51727d007b09c01936d4a365ad0aa6569c530cf0","ref":"refs/heads/main","pushedAt":"2023-09-21T15:14:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"router z loss should be calculated on the unnoised gating logits","shortMessageHtmlLink":"router z loss should be calculated on the unnoised gating logits"}},{"before":"b7ace1b28698c51745859c5f0c9f402cc7abb8df","after":"d9f5f0891115ecd31290d92f510d56625eb2d417","ref":"refs/heads/main","pushedAt":"2023-09-21T04:12:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for noising of gates","shortMessageHtmlLink":"allow for noising of gates"}},{"before":"977ee5500c000b47f270d6cc4ccd33bef380ca0f","after":"b7ace1b28698c51745859c5f0c9f402cc7abb8df","ref":"refs/heads/main","pushedAt":"2023-09-11T21:44:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"readme","shortMessageHtmlLink":"readme"}},{"before":"5d5f07140b2cb1ea95e9f0acb93ba7397d3c5ad2","after":"977ee5500c000b47f270d6cc4ccd33bef380ca0f","ref":"refs/heads/main","pushedAt":"2023-09-11T21:43:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"researcher will want to log the unweighted auxiliary losses","shortMessageHtmlLink":"researcher will want to log the unweighted auxiliary losses"}},{"before":"240414a414085e4ff18ca1b26d48effe4e8ba139","after":"5d5f07140b2cb1ea95e9f0acb93ba7397d3c5ad2","ref":"refs/heads/main","pushedAt":"2023-09-11T21:42:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"rename loss_coef to balance_loss_coef, sum the balance and router z-loss and return the total auxiliary loss and add some comments in readme on what to do with it","shortMessageHtmlLink":"rename loss_coef to balance_loss_coef, sum the balance and router z-l…"}},{"before":"2bb762dee16c813edee1603bf0e42ef5778e7ece","after":"240414a414085e4ff18ca1b26d48effe4e8ba139","ref":"refs/heads/main","pushedAt":"2023-09-11T19:19:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"validate that it works on single machine with multiple GPUs","shortMessageHtmlLink":"validate that it works on single machine with multiple GPUs"}},{"before":"6982343e2003b4497ba0f7cb1b3c657dc59753bf","after":"2bb762dee16c813edee1603bf0e42ef5778e7ece","ref":"refs/heads/main","pushedAt":"2023-09-11T15:52:13.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"handle variable sequence lengths if `allow_var_seq_len = True` on `Experts`","shortMessageHtmlLink":"handle variable sequence lengths if allow_var_seq_len = True on `Ex…"}},{"before":"bd72d52f4226459c2a228f82838b2352c7313e90","after":"6982343e2003b4497ba0f7cb1b3c657dc59753bf","ref":"refs/heads/main","pushedAt":"2023-09-11T01:09:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"project manage","shortMessageHtmlLink":"project manage"}},{"before":"00be346078d75de78d765131293e7665234ff440","after":"bd72d52f4226459c2a228f82838b2352c7313e90","ref":"refs/heads/main","pushedAt":"2023-09-10T20:18:15.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"any combination of number of experts and world size should not break","shortMessageHtmlLink":"any combination of number of experts and world size should not break"}},{"before":"52b5c8a779dcd9764ac68911d0a7f3290ed2ac5f","after":"00be346078d75de78d765131293e7665234ff440","ref":"refs/heads/main","pushedAt":"2023-09-10T20:09:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"any combinatino of number of experts and world size should not break","shortMessageHtmlLink":"any combinatino of number of experts and world size should not break"}},{"before":"83d75b83a3dba5e929104a0c041ab7568cbc2897","after":"52b5c8a779dcd9764ac68911d0a7f3290ed2ac5f","ref":"refs/heads/main","pushedAt":"2023-09-10T16:59:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"oops","shortMessageHtmlLink":"oops"}},{"before":"54188734ff338ba5e30b2e4fd51c9dd623782678","after":"83d75b83a3dba5e929104a0c041ab7568cbc2897","ref":"refs/heads/main","pushedAt":"2023-09-10T16:25:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"chip away at edge cases","shortMessageHtmlLink":"chip away at edge cases"}},{"before":"666d2fd41734f25e517b25e980419b80e023d00f","after":"54188734ff338ba5e30b2e4fd51c9dd623782678","ref":"refs/heads/main","pushedAt":"2023-09-10T15:04:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"another micro optimization for communication","shortMessageHtmlLink":"another micro optimization for communication"}},{"before":"2e272df8b756ef3fe98286482fdbeed338d8c716","after":"666d2fd41734f25e517b25e980419b80e023d00f","ref":"refs/heads/main","pushedAt":"2023-09-10T14:50:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"in split by rank function, cache the sizes so on backwards there is not an extra call","shortMessageHtmlLink":"in split by rank function, cache the sizes so on backwards there is n…"}},{"before":"085d5118dad9047a4698d6346d51ff97210204c8","after":"2e272df8b756ef3fe98286482fdbeed338d8c716","ref":"refs/heads/main","pushedAt":"2023-09-09T16:16:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"spell it out","shortMessageHtmlLink":"spell it out"}},{"before":"1f63c54be553be385cf01d0438576cee379938c0","after":"085d5118dad9047a4698d6346d51ff97210204c8","ref":"refs/heads/main","pushedAt":"2023-09-09T16:10:45.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"start journeying into distributed mixture of experts implementation","shortMessageHtmlLink":"start journeying into distributed mixture of experts implementation"}},{"before":"97a56888e373363c6c5cd1211701939f76967ab3","after":"1f63c54be553be385cf01d0438576cee379938c0","ref":"refs/heads/main","pushedAt":"2023-08-26T00:23:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"project management","shortMessageHtmlLink":"project management"}},{"before":"22dfd4da93d60e17948cbf751842c7014de027e3","after":"97a56888e373363c6c5cd1211701939f76967ab3","ref":"refs/heads/main","pushedAt":"2023-08-26T00:14:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add ability to use differentiable topk","shortMessageHtmlLink":"add ability to use differentiable topk"}},{"before":"f9b8ce34a08c1515b4230c9607e86107bb938b23","after":"22dfd4da93d60e17948cbf751842c7014de027e3","ref":"refs/heads/main","pushedAt":"2023-08-21T22:46:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for different thresholds between second and third expert","shortMessageHtmlLink":"allow for different thresholds between second and third expert"}},{"before":"166c41c207dd9a08ec952284e3734357d4e27ecb","after":"f9b8ce34a08c1515b4230c9607e86107bb938b23","ref":"refs/heads/main","pushedAt":"2023-08-21T22:09:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"multiply gates by mask_flat twice, as in mesh tensorflow code for top-n gating","shortMessageHtmlLink":"multiply gates by mask_flat twice, as in mesh tensorflow code for top…"}},{"before":"96b7309fb5f44292d81bda77a4e1bcdbca6c7a10","after":"166c41c207dd9a08ec952284e3734357d4e27ecb","ref":"refs/heads/main","pushedAt":"2023-08-21T19:43:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"cleanup and project management","shortMessageHtmlLink":"cleanup and project management"}},{"before":"a501ec519f862df34ec64decce06efcadcff8b4a","after":"96b7309fb5f44292d81bda77a4e1bcdbca6c7a10","ref":"refs/heads/main","pushedAt":"2023-08-21T18:39:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"show example with entire mixture-of-experts block","shortMessageHtmlLink":"show example with entire mixture-of-experts block"}},{"before":"1ca8170a6bc2ae94a8aa32a40aff91895d7f696a","after":"a501ec519f862df34ec64decce06efcadcff8b4a","ref":"refs/heads/main","pushedAt":"2023-08-21T18:34:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"tweak","shortMessageHtmlLink":"tweak"}},{"before":"1c4a2aa030bb15299490afeea922bea46b2ae16f","after":"1ca8170a6bc2ae94a8aa32a40aff91895d7f696a","ref":"refs/heads/main","pushedAt":"2023-08-21T18:33:06.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"better naming","shortMessageHtmlLink":"better naming"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEXIcR4gA","startCursor":null,"endCursor":null}},"title":"Activity · lucidrains/st-moe-pytorch"}