{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":631005427,"defaultBranch":"main","name":"mixture-of-attention","ownerLogin":"lucidrains","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-04-21T17:14:56.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/108653?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1687560821.0","currentOid":""},"activityList":{"items":[{"before":"e43860bce51cd69150b979ef85b1e05e42a8bd58","after":"0ae023a6935703bac6ebc5f6284e60aa7b768881","ref":"refs/heads/main","pushedAt":"2023-07-16T18:53:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"note to self","shortMessageHtmlLink":"note to self"}},{"before":"1b69d47a780f13bfc7e713171fae53c19bdb00af","after":"e43860bce51cd69150b979ef85b1e05e42a8bd58","ref":"refs/heads/main","pushedAt":"2023-06-23T22:53:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix some issues with autoregressive attention when sequence is too short for queries to need routing, also bump colt5-attention due to a small bug in router","shortMessageHtmlLink":"fix some issues with autoregressive attention when sequence is too sh…"}},{"before":"cb38d47d4272112732606a028f458498c876bf28","after":"1b69d47a780f13bfc7e713171fae53c19bdb00af","ref":"refs/heads/main","pushedAt":"2023-06-23T21:59:54.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add routed rotary embeddings","shortMessageHtmlLink":"add routed rotary embeddings"}},{"before":"a00c56836f7a406402d2539185cce2d6c0d1e52f","after":"cb38d47d4272112732606a028f458498c876bf28","ref":"refs/heads/main","pushedAt":"2023-05-25T03:43:09.842Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"use null token per expert, generations look much better now","shortMessageHtmlLink":"use null token per expert, generations look much better now"}},{"before":"f415d9f6f0ba3cc7d060e185d30d7c800b2fef24","after":"a00c56836f7a406402d2539185cce2d6c0d1e52f","ref":"refs/heads/main","pushedAt":"2023-05-25T02:01:31.541Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"make averaging routed tokens an option for non-autoregressive","shortMessageHtmlLink":"make averaging routed tokens an option for non-autoregressive"}},{"before":"dbd5a38bbe4b2d62232a8a4728c654e9fb2b6f9c","after":"f415d9f6f0ba3cc7d060e185d30d7c800b2fef24","ref":"refs/heads/main","pushedAt":"2023-05-25T01:51:29.918Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"make averaging the routed tokens through scatter mean an option","shortMessageHtmlLink":"make averaging the routed tokens through scatter mean an option"}},{"before":"e9944079e402d541e2e020f6f41b7f38368e06cd","after":"dbd5a38bbe4b2d62232a8a4728c654e9fb2b6f9c","ref":"refs/heads/main","pushedAt":"2023-05-25T00:26:51.207Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"update triton coor descent","shortMessageHtmlLink":"update triton coor descent"}},{"before":"aee85c00f3ed927c1a23900570c29f9b3ef75cc7","after":"e9944079e402d541e2e020f6f41b7f38368e06cd","ref":"refs/heads/main","pushedAt":"2023-05-25T00:08:09.847Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"use cosine sim routing","shortMessageHtmlLink":"use cosine sim routing"}},{"before":"2ce991e5e4b4c9a44e76cf120bc746cb0848e68a","after":"aee85c00f3ed927c1a23900570c29f9b3ef75cc7","ref":"refs/heads/main","pushedAt":"2023-05-24T23:39:45.010Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"turn off triton for now","shortMessageHtmlLink":"turn off triton for now"}},{"before":"25c9d125eccdab09f47276c038409b9c421a0c5d","after":"2ce991e5e4b4c9a44e76cf120bc746cb0848e68a","ref":"refs/heads/main","pushedAt":"2023-05-24T23:29:26.550Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix having one expert","shortMessageHtmlLink":"fix having one expert"}},{"before":"8805389d0da5223dd4bcb8b81e65d0764ab9e822","after":"25c9d125eccdab09f47276c038409b9c421a0c5d","ref":"refs/heads/main","pushedAt":"2023-05-24T23:07:41.272Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"basically done","shortMessageHtmlLink":"basically done"}},{"before":"43e085ba686616a02a0e29f2415e0a898730950a","after":"8805389d0da5223dd4bcb8b81e65d0764ab9e822","ref":"refs/heads/main","pushedAt":"2023-05-24T23:05:23.977Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"add transformer with mixture of autoregressive attention","shortMessageHtmlLink":"add transformer with mixture of autoregressive attention"}},{"before":"4c196e76bc3fea31c22faccfb841e9513e80987b","after":"43e085ba686616a02a0e29f2415e0a898730950a","ref":"refs/heads/main","pushedAt":"2023-05-24T23:04:24.237Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"can now train autoregressive mixture of attention with triton fused coordinate descent","shortMessageHtmlLink":"can now train autoregressive mixture of attention with triton fused c…"}},{"before":"3f4827650e15a48be29a9a47b346b69f0ff2c159","after":"4c196e76bc3fea31c22faccfb841e9513e80987b","ref":"refs/heads/main","pushedAt":"2023-05-24T21:11:48.059Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix variable lengthed sequences and the final scatter mean of the selected queries\n\nautoregressive mixture-of-attention complete","shortMessageHtmlLink":"fix variable lengthed sequences and the final scatter mean of the sel…"}},{"before":"ba7c2eb3cc187bb91dcaa7a07963ecc936d5bca2","after":"3f4827650e15a48be29a9a47b346b69f0ff2c159","ref":"refs/heads/main","pushedAt":"2023-05-24T21:10:17.897Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"todo","shortMessageHtmlLink":"todo"}},{"before":"ceb8abe2c2889a3f9c5bf60b9fbeec8013a266af","after":"ba7c2eb3cc187bb91dcaa7a07963ecc936d5bca2","ref":"refs/heads/main","pushedAt":"2023-05-24T21:09:41.869Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"autoregressive mixture-of-attention complete","shortMessageHtmlLink":"autoregressive mixture-of-attention complete"}},{"before":"7db9ba2eae002227f4562c795bb109f050ec2b57","after":"ceb8abe2c2889a3f9c5bf60b9fbeec8013a266af","ref":"refs/heads/main","pushedAt":"2023-05-24T21:09:10.068Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"autoregressive mixture of experts complete","shortMessageHtmlLink":"autoregressive mixture of experts complete"}},{"before":"e9ea91929cd2fc29b0635ce41a47dd1865acb50e","after":"7db9ba2eae002227f4562c795bb109f050ec2b57","ref":"refs/heads/main","pushedAt":"2023-05-24T18:08:39.966Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix variable lengthed sequences and the final scatter mean of the selected queries","shortMessageHtmlLink":"fix variable lengthed sequences and the final scatter mean of the sel…"}},{"before":"6238ccef0e74dbb9c4554daf18fe29a561d69fff","after":"e9ea91929cd2fc29b0635ce41a47dd1865acb50e","ref":"refs/heads/main","pushedAt":"2023-05-24T16:50:10.411Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"do all the scaling within the customized attention","shortMessageHtmlLink":"do all the scaling within the customized attention"}},{"before":"b4b3be11371052ebddfe5615a3fef214b91ea727","after":"6238ccef0e74dbb9c4554daf18fe29a561d69fff","ref":"refs/heads/main","pushedAt":"2023-05-23T20:07:52.902Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"ready enwik8 for some experiments","shortMessageHtmlLink":"ready enwik8 for some experiments"}},{"before":"020d8d05a2aca9f30c7d19db86c531320f617ebf","after":"b4b3be11371052ebddfe5615a3fef214b91ea727","ref":"refs/heads/main","pushedAt":"2023-05-22T16:58:09.139Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"update to use epsilon scaling","shortMessageHtmlLink":"update to use epsilon scaling"}},{"before":"ffadfd42672e6aec49c85d89cbf048ba326b238f","after":"020d8d05a2aca9f30c7d19db86c531320f617ebf","ref":"refs/heads/main","pushedAt":"2023-05-17T00:47:15.332Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"do not need a null routed token out if using local attention","shortMessageHtmlLink":"do not need a null routed token out if using local attention"}},{"before":"44a4df1d719eb1d53cab046755709869b6e0bac9","after":"ffadfd42672e6aec49c85d89cbf048ba326b238f","ref":"refs/heads/main","pushedAt":"2023-05-16T20:07:23.007Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"complete local attention branch for non-autoregressive","shortMessageHtmlLink":"complete local attention branch for non-autoregressive"}},{"before":"eee5e29885a25dc65712cbfd60d2355362c3ce0b","after":"44a4df1d719eb1d53cab046755709869b6e0bac9","ref":"refs/heads/main","pushedAt":"2023-05-16T18:44:00.360Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for pre rmsnorm with grouping in the attention","shortMessageHtmlLink":"allow for pre rmsnorm with grouping in the attention"}},{"before":"a8cc5bd93e4645653c4a3ef5ace58b1902b7f6e1","after":"eee5e29885a25dc65712cbfd60d2355362c3ce0b","ref":"refs/heads/main","pushedAt":"2023-05-16T18:30:10.341Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"extra insurance","shortMessageHtmlLink":"extra insurance"}},{"before":"299e4c00cc0a142cf3c30a5670749e5885341a19","after":"a8cc5bd93e4645653c4a3ef5ace58b1902b7f6e1","ref":"refs/heads/main","pushedAt":"2023-05-16T18:26:06.559Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"extra insurance","shortMessageHtmlLink":"extra insurance"}},{"before":"269063d76892bfae7fa9b60ff1b4125f31fde64f","after":"299e4c00cc0a142cf3c30a5670749e5885341a19","ref":"refs/heads/main","pushedAt":"2023-05-16T18:11:34.765Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"appreciation","shortMessageHtmlLink":"appreciation"}},{"before":"cf46714b955776fb8d09ab0c64577b0bdf61085e","after":"269063d76892bfae7fa9b60ff1b4125f31fde64f","ref":"refs/heads/main","pushedAt":"2023-05-16T18:10:38.799Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"less prone to overflow","shortMessageHtmlLink":"less prone to overflow"}},{"before":"60ed2c6f50623f84c44a7cd2d29157577332641e","after":"cf46714b955776fb8d09ab0c64577b0bdf61085e","ref":"refs/heads/main","pushedAt":"2023-05-16T16:33:54.284Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"cleanup","shortMessageHtmlLink":"cleanup"}},{"before":"21f17c48bd735a66b7818fbc4693724eaa866344","after":"60ed2c6f50623f84c44a7cd2d29157577332641e","ref":"refs/heads/main","pushedAt":"2023-05-15T22:04:01.417Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix edge case where queries are not routed, just average across expert dimension","shortMessageHtmlLink":"fix edge case where queries are not routed, just average across exper…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAADVpYd1AA","startCursor":null,"endCursor":null}},"title":"Activity · lucidrains/mixture-of-attention"}