git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
86 lines
3.2 KiB
BibTeX
86 lines
3.2 KiB
BibTeX
% Bibliography for Mincut-Gated Transformer
|
|
|
|
@article{raposo2024mixture,
|
|
title={Mixture-of-Depths: Dynamically allocating compute in transformer-based language models},
|
|
author={Raposo, David and Ritter, Sam and Richards, Blake A and Lillicrap, Timothy P and Humphreys, Peter Conway and Santoro, Adam},
|
|
journal={arXiv preprint arXiv:2404.02258},
|
|
year={2024}
|
|
}
|
|
|
|
@article{elhoushi2024layerskip,
|
|
title={LayerSkip: Enabling Early Exit Inference and Self-Speculative Decoding},
|
|
author={Elhoushi, Mostafa and Diana, Akshat and Xu, Zhongwei and Choi, Yuxiong and Zhang, Yuchen and Keutzer, Kurt},
|
|
journal={arXiv preprint arXiv:2404.16710},
|
|
year={2024}
|
|
}
|
|
|
|
@inproceedings{jiang2024minference,
|
|
title={MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention},
|
|
author={Jiang, Huiqiang and Wu, Qianhui and Zheng, Haoyang and Li, Yue and Yang, Hongsheng},
|
|
booktitle={Advances in Neural Information Processing Systems},
|
|
volume={37},
|
|
year={2024}
|
|
}
|
|
|
|
@article{gladstone2025energy,
|
|
title={Energy-Based Transformers are Scalable Learners and Thinkers},
|
|
author={Gladstone, Aram and Shankar, Shishir and Belanger, David and Likhomanenko, Tatiana and Faust, Aleksandra},
|
|
journal={arXiv preprint arXiv:2507.02092},
|
|
year={2025}
|
|
}
|
|
|
|
@inproceedings{yao2023spike,
|
|
title={Spike-driven Transformer},
|
|
author={Yao, Man and Zhao, Guangshe and Zhang, Hengyu and Hu, Yifan and Deng, Lei and Tian, Yonghong and Xu, Bo and Li, Guoqi},
|
|
booktitle={Advances in Neural Information Processing Systems},
|
|
volume={36},
|
|
pages={56--78},
|
|
year={2023}
|
|
}
|
|
|
|
@inproceedings{yao2024spike2,
|
|
title={Spike-driven Transformer V2: Meta Spiking Neural Network Architecture Inspiring Integrated Artificial Intelligence},
|
|
author={Yao, Man and Zhang, Hengyu and Zhao, Guangshe and Wang, Jiechen and Hu, Yifan and Deng, Lei and Li, Guoqi},
|
|
booktitle={International Conference on Learning Representations},
|
|
year={2024}
|
|
}
|
|
|
|
@inproceedings{kreuzer2021spectral,
|
|
title={Rethinking Graph Transformers with Spectral Attention},
|
|
author={Kreuzer, Devin and Beaini, Dominique and Hamilton, Will and L{\'e}tourneau, Vincent and Tossou, Prudencio},
|
|
booktitle={Advances in Neural Information Processing Systems},
|
|
volume={34},
|
|
pages={21618--21629},
|
|
year={2021}
|
|
}
|
|
|
|
@article{kernighan1970efficient,
|
|
title={An efficient heuristic procedure for partitioning graphs},
|
|
author={Kernighan, Brian W and Lin, Shen},
|
|
journal={Bell System Technical Journal},
|
|
volume={49},
|
|
number={2},
|
|
pages={291--307},
|
|
year={1970},
|
|
publisher={Wiley Online Library}
|
|
}
|
|
|
|
@article{blondel2008fast,
|
|
title={Fast unfolding of communities in large networks},
|
|
author={Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne},
|
|
journal={Journal of Statistical Mechanics: Theory and Experiment},
|
|
volume={2008},
|
|
number={10},
|
|
pages={P10008},
|
|
year={2008},
|
|
publisher={IOP Publishing}
|
|
}
|
|
|
|
@inproceedings{vaswani2017attention,
|
|
title={Attention is all you need},
|
|
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
|
|
booktitle={Advances in Neural Information Processing Systems},
|
|
volume={30},
|
|
year={2017}
|
|
}
|