{"id":491,"date":"2025-01-16T08:31:07","date_gmt":"2025-01-16T00:31:07","guid":{"rendered":"https:\/\/aitimes.link\/?p=491"},"modified":"2025-01-16T08:32:40","modified_gmt":"2025-01-16T00:32:40","slug":"prime%ef%bc%88process-reinforcement-through-implicit-rewards%ef%bc%89","status":"publish","type":"post","link":"https:\/\/aitimes.link\/index.php\/2025\/01\/16\/prime%ef%bc%88process-reinforcement-through-implicit-rewards%ef%bc%89\/","title":{"rendered":"PRIME"},"content":{"rendered":"\n<p>\u8fd1\u65e5\uff0c\u6e05\u534e\u5927\u5b66 NLP \u5b9e\u9a8c\u5ba4\u8054\u5408\u4e0a\u6d77 AI Lab\uff0c\u6e05\u534e\u5927\u5b66\u7535\u5b50\u7cfb\u53ca OpenBMB \u793e\u533a\u63d0\u51fa\u4e00\u79cd\u65b0\u7684\u7ed3\u5408\u8fc7\u7a0b\u5956\u52b1\u7684\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u2014\u2014 PRIME\uff08Process Reinforcement through IMplicit REwards\uff09\uff0c\u91c7\u7528 PRIME \u65b9\u6cd5\uff0c\u7814\u7a76\u4eba\u5458\u4e0d\u4f9d\u8d56\u4efb\u4f55\u84b8\u998f\u6570\u636e\u548c\u6a21\u4eff\u5b66\u4e60\uff0c\u4ec5\u7528 8 \u5f20 A100\uff0c\u82b1\u8d39\u4e00\u4e07\u5757\u94b1\u5de6\u53f3\uff0c\u4e0d\u5230 10 \u5929\u65f6\u95f4\uff0c\u5c31\u80fd\u9ad8\u6548\u8bad\u7ec3\u51fa\u4e00\u4e2a\u6570\u5b66\u80fd\u529b\u8d85\u8fc7 GPT-4o\u3001Llama-3.1-70B \u7684 7B \u6a21\u578b Eurus-2-7B-PRIME\u3002<\/p>\n\n\n\n<p>\u5177\u4f53\u800c\u8a00\uff0c\u7814\u7a76\u4eba\u5458\u5229\u7528 Qwen2.5-Math-7B-Base \u4f5c\u4e3a\u57fa\u5ea7\u6a21\u578b\uff0c\u8bad\u7ec3\u51fa\u4e86\u65b0\u6a21\u578b Eurus-2-7B-PRIME \uff0c\u5e76\u5728\u7f8e\u56fd IMO \u9009\u62d4\u8003\u8bd5 AIME 2024 \u4e0a\u7684\u51c6\u786e\u7387\u8fbe\u5230 26.7%\uff0c\u5927\u5e45\u8d85\u8d8a GPT-4o\uff0cLlama3.1-70B \u548c Qwen2.5-Math-7B-Instruct\uff0c\u4e14\u4ec5\u4f7f\u7528\u4e86 Qwen Math \u6570\u636e\u7684 1\/10\u3002\u5176\u4e2d\uff0c\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5 PRIME \u4e3a\u6a21\u578b\u5e26\u6765\u4e86 16.7% \u7684\u7edd\u5bf9\u63d0\u5347\uff0c\u8fdc\u8d85\u5df2\u77e5\u7684\u4efb\u4f55\u5f00\u6e90\u65b9\u6848\u3002<\/p>\n\n\n\n<p><a href=\"https:\/\/www.infoq.cn\/article\/9WdYUx1JVh0xZ0GfQcVx\">https:\/\/www.infoq.cn\/article\/9WdYUx1JVh0xZ0GfQcVx<\/a><\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"576\" src=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-27-1024x576.png\" alt=\"\" class=\"wp-image-492\" srcset=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-27-1024x576.png 1024w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-27-300x169.png 300w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-27-768x432.png 768w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-27.png 1080w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"841\" src=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-28-1024x841.png\" alt=\"\" class=\"wp-image-493\" srcset=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-28-1024x841.png 1024w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-28-300x246.png 300w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-28-768x631.png 768w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-28.png 1376w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"716\" src=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-29-1024x716.png\" alt=\"\" class=\"wp-image-494\" srcset=\"https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-29-1024x716.png 1024w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-29-300x210.png 300w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-29-768x537.png 768w, https:\/\/aitimes.link\/wp-content\/uploads\/2025\/01\/image-29.png 1350w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>\u8fd1\u65e5\uff0c\u6e05\u534e\u5927\u5b66 NLP \u5b9e\u9a8c\u5ba4\u8054\u5408\u4e0a\u6d77 AI Lab\uff0c\u6e05\u534e\u5927\u5b66\u7535\u5b50\u7cfb\u53ca OpenBMB \u793e\u533a\u63d0\u51fa\u4e00\u79cd\u65b0\u7684\u7ed3\u5408\u8fc7 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[6],"tags":[],"class_list":["post-491","post","type-post","status-publish","format-standard","hentry","category-ai"],"_links":{"self":[{"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/posts\/491","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/comments?post=491"}],"version-history":[{"count":2,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/posts\/491\/revisions"}],"predecessor-version":[{"id":496,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/posts\/491\/revisions\/496"}],"wp:attachment":[{"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/media?parent=491"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/categories?post=491"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/aitimes.link\/index.php\/wp-json\/wp\/v2\/tags?post=491"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}