Update SWE-bench github url

swe-bench · Jan 22, 2025 · 20c4e14 · 20c4e14
1 parent d9f5656
commit 20c4e14
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 42 deletions.
diff --git a/index.html b/index.html
@@ -73,7 +73,7 @@ <h3 style="font-size: 20px; padding-top: 1.2em">ICLR 2024</h3>
                 <i class="fa fa-paperclip"></i> Paper&nbsp;
               </button>
             </a>
-            <a href="https://github.com/princeton-nlp/SWE-bench">
+            <a href="https://github.com/swe-bench/SWE-bench">
               <button class="outline">
                 <i class="fab fa-github"></i> Code&nbsp;
               </button>
@@ -138,7 +138,7 @@ <h2 class="text-title">News</h2>
             </p>
             <p style="margin-bottom: 0.5em">
               📣 [06/2024] We've <b>Docker</b>-ized SWE-bench for easier, containerized, reproducible evaluation.
-              [<a style="color:#0ca7ff" href="https://github.com/princeton-nlp/SWE-bench/tree/main/docs/20240627_docker">Report</a>]
+              [<a style="color:#0ca7ff" href="https://github.com/swe-bench/SWE-bench/tree/main/docs/20240627_docker">Report</a>]
             </p>
             <p style="margin-bottom: 0.5em">
               📣 [03/2024] Check out our latest work, <b>SWE-agent</b>, which achieves a 12.47% resolve rate on SWE-bench!
@@ -521,7 +521,7 @@ <h2 class="text-title">Leaderboard</h2>
                     </td>
                     <td>
                         <p style="text-align: center;">
-                            <a href="https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference">🔗</a>
+                            <a href="https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference">🔗</a>
                         </p>
                     </td>
                   </tr>
@@ -672,7 +672,7 @@ <h2 class="text-title">Leaderboard</h2>
                   </tr>
                 </thead>
                 <tbody>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🥇 
@@ -700,7 +700,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🥈 
@@ -754,7 +754,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         Learn-by-interact
@@ -943,7 +943,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         Bracket.sh
@@ -1348,7 +1348,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         CodeShellAgent + Gemini 2.0 Flash (Experimental)
@@ -1402,7 +1402,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         ugaiforge
@@ -2149,7 +2149,7 @@ <h2 class="text-title">Leaderboard</h2>
                     </td>
                     <td>
                         <p style="text-align: center;">
-                            <a href="https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference">🔗</a>
+                            <a href="https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference">🔗</a>
                         </p>
                     </td>
                   </tr>
@@ -2300,7 +2300,7 @@ <h2 class="text-title">Leaderboard</h2>
                   </tr>
                 </thead>
                 <tbody>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🥇 
@@ -2571,7 +2571,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         OpenCSG Starship Agentic Coder + GPT 4 (0806)
@@ -2624,7 +2624,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🤠 ✅                         Moatless Tools + Claude 3.5 Sonnet (20241022)
@@ -2703,7 +2703,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🤠                         Patched.Codes Patchwork
@@ -2757,7 +2757,7 @@ <h2 class="text-title">Leaderboard</h2>
                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕                         CodeFuse-AAIS
@@ -3686,7 +3686,7 @@ <h2 class="text-title">Leaderboard</h2>
                     </td>
                     <td>
                         <p style="text-align: center;">
-                            <a href="https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference">🔗</a>
+                            <a href="https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference">🔗</a>
                         </p>
                     </td>
                   </tr>
@@ -3820,7 +3820,7 @@ <h2 class="text-title">Leaderboard</h2>
  -                         </p>
                     </td>
                   </tr>
-                  <tr style="background-color: #82CEFA;">
+                  <tr>
                     <td>
                       <p class="model-type">
 🆕 🤠 ✅                         Moatless Tools + Deepseek V3
@@ -4078,7 +4078,7 @@ <h2 class="text-title">Leaderboard</h2>
                     </td>
                     <td>
                         <p style="text-align: center;">
-                            <a href="https://github.com/princeton-nlp/SWE-bench">🔗</a>
+                            <a href="https://github.com/swe-bench/SWE-bench">🔗</a>
                         </p>
                     </td>
                   </tr>
@@ -4105,7 +4105,7 @@ <h2 class="text-title">Leaderboard</h2>
                     </td>
                     <td>
                         <p style="text-align: center;">
-                            <a href="https://github.com/princeton-nlp/SWE-bench">🔗</a>
+                            <a href="https://github.com/swe-bench/SWE-bench">🔗</a>
                         </p>
                     </td>
                   </tr>
@@ -4157,10 +4157,11 @@ <h2 class="text-title">Leaderboard</h2>
               <br>
               - <span style="color:#0ea7ff;"><b>🤠 Open</b></span> refers to submissions that have open-source code. This does <i>not</i>
               necessarily mean the underlying model is open-source.
-              <br><br>
-              - If you would like to submit your model to the leaderboard, please check the <a href="submit.html">submission</a> page.
               <br>
-              - All submissions are Pass@1, do not use
+              - <span style="color:#0ea7ff;"><b>🆕 New</b></span> refers to the most recently submitted solutions.
+              <br><br>
+              If you'd like to submit to the leaderboard, please check <a href="submit.html">this</a> page.
+              All submissions are Pass@1, do not use
               <code style="color:black;background-color:#ddd;border-radius: 0.25em">hints_text</code>,
               and are in the unassisted setting.
             </p>

diff --git a/lite.html b/lite.html
@@ -79,7 +79,7 @@ <h3>A Canonical Subset for Efficient Evaluation of Language Models as Software E
               Since its release, we've found that for most systems evaluating on SWE-bench, running each instance can take a lot of time and compute. We've also found that SWE-bench can be a particularly difficult benchmark, which is useful for evaluating LMs in the long term, but discouraging for systems trying to make progress in the short term.
               <br/>
               <br/>
-              To remedy these issues, we've released a canonical subset of SWE-bench called SWE-bench Lite. SWE-bench Lite comprises 300 instances from SWE-bench that have been sampled to be more self-contained, with a focus on evaluating functional bug fixes. SWE-bench Lite covers 11 of the original 12 repositories in SWE-bench, with a similar diversity and distribution of repositories as the original. We perform similar filtering on the SWE-bench dev set to provide 23 development instances that can be useful for active development on the SWE-bench task. We recommend future systems evaluating on SWE-bench to report numbers on SWE-bench Lite in lieu of the full SWE-bench set if necessary. You can find the source code for how SWE-bench Lite was created in <a href="https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/collect/make_lite">SWE-bench/swebench/collect/make_lite</a>.
+              To remedy these issues, we've released a canonical subset of SWE-bench called SWE-bench Lite. SWE-bench Lite comprises 300 instances from SWE-bench that have been sampled to be more self-contained, with a focus on evaluating functional bug fixes. SWE-bench Lite covers 11 of the original 12 repositories in SWE-bench, with a similar diversity and distribution of repositories as the original. We perform similar filtering on the SWE-bench dev set to provide 23 development instances that can be useful for active development on the SWE-bench task. We recommend future systems evaluating on SWE-bench to report numbers on SWE-bench Lite in lieu of the full SWE-bench set if necessary. You can find the source code for how SWE-bench Lite was created in <a href="https://github.com/swe-bench/SWE-bench/tree/main/swebench/collect/make_lite">SWE-bench/swebench/collect/make_lite</a>.
               <br/>
               <br/>
               Here's a list of the general criteria we used to select SWE-bench Lite instances:

diff --git a/multimodal.html b/multimodal.html
@@ -69,6 +69,7 @@ <h1 style="font-size: 60px; padding-top: 0.4em; color: #2F4F4F;">SWE-bench Multi
             />
           </div>
           <h3 style="color: #2F4F4F;">Do AI Systems Generalize to Visual Software Domains?</h3>
+          <h3 style="font-size: 20px; padding-top: 1.2em">ICLR 2025</h3>
           <p style="text-align: center;margin-top:1em; color: #2F4F4F;">
             John Yang*, Carlos E. Jimenez*,<br />
             Alex L. Zhang, Kilian Lieret, Joyce Yang, Xindi Wu, Ori Press, Niklas Muennighoff,<br />

diff --git a/submit.html b/submit.html
@@ -73,7 +73,7 @@ <h1 style="font-size: 60px; padding-top: 0.4em">Submit to SWE-bench</h1>
                         <i class="fa fa-paperclip"></i> Paper&nbsp;
                     </button>
                 </a>
-                <a href="https://github.com/princeton-nlp/SWE-bench">
+                <a href="https://github.com/swe-bench/SWE-bench">
                     <button class="outline">
                         <i class="fab fa-github"></i> Code&nbsp;
                     </button>
@@ -143,7 +143,7 @@ <h3>
                     <li>Create a pull request to the SWE-bench/experiments repository with the new folder.</li>
                 </ol>
                 <p>
-                    You can refer to this <a href="https://github.com/princeton-nlp/SWE-bench/blob/main/tutorials/evaluation.md">tutorial</a> for a quick overview of how to evaluate your model on SWE-bench.
+                    You can refer to this <a href="https://github.com/swe-bench/SWE-bench/blob/main/tutorials/evaluation.md">tutorial</a> for a quick overview of how to evaluate your model on SWE-bench.
                 </p>        
             </div>
         </div>
@@ -156,7 +156,7 @@ <h3>
                     Please note that we consider an eligible submission to the SWE-bench [Lite] leaderboard to satisfy these criteria:
                 </p>
                 <ol>
-                    <li>The use of the <code>hints_text</code> field is <i>not</i> allowed. See our explanation <a href="https://github.com/princeton-nlp/SWE-bench/issues/133">here</a>.</li>
+                    <li>The use of the <code>hints_text</code> field is <i>not</i> allowed. See our explanation <a href="https://github.com/swe-bench/SWE-bench/issues/133">here</a>.</li>
                     <li>The result should be pass@1. There should be one execution log per task instance for all 2294 task instances.</li>
                     <li>The result should <i>not</i> be in the "Oracle" retrieval setting. The agent cannot be told the correct files to edit, where "correct" refers to the files modified by the reference solution patch.</li>
                 </ol>

diff --git a/template/data.json b/template/data.json
@@ -166,7 +166,7 @@
           "date": "2024-04-02",
           "logs": true,
           "trajs": false,
-          "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference",
+          "site": "https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference",
           "verified": true,
           "oss": true,
           "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4",
@@ -964,7 +964,7 @@
           "date": "2024-04-02",
           "logs": true,
           "trajs": false,
-          "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference",
+          "site": "https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference",
           "verified": true,
           "oss": true,
           "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4",
@@ -1762,7 +1762,7 @@
           "date": "2024-04-02",
           "logs": true,
           "trajs": false,
-          "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference",
+          "site": "https://github.com/swe-bench/SWE-bench/tree/main/swebench/inference",
           "verified": true,
           "oss": true,
           "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4",
@@ -1949,7 +1949,7 @@
           "date": "2024-10-06",
           "logs": false,
           "trajs": false,
-          "site": "https://github.com/princeton-nlp/SWE-bench",
+          "site": "https://github.com/swe-bench/SWE-bench",
           "verified": true,
           "oss": true,
           "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4",
@@ -1962,7 +1962,7 @@
           "date": "2024-10-06",
           "logs": false,
           "trajs": false,
-          "site": "https://github.com/princeton-nlp/SWE-bench",
+          "site": "https://github.com/swe-bench/SWE-bench",
           "verified": true,
           "oss": true,
           "org_logo": "https://avatars.githubusercontent.com/u/139597579?s=200&v=4",

diff --git a/template/template_index.html b/template/template_index.html
@@ -73,7 +73,7 @@ <h3 style="font-size: 20px; padding-top: 1.2em">ICLR 2024</h3>
                 <i class="fa fa-paperclip"></i> Paper&nbsp;
               </button>
             </a>
-            <a href="https://github.com/princeton-nlp/SWE-bench">
+            <a href="https://github.com/swe-bench/SWE-bench">
               <button class="outline">
                 <i class="fab fa-github"></i> Code&nbsp;
               </button>
@@ -138,7 +138,7 @@ <h2 class="text-title">News</h2>
             </p>
             <p style="margin-bottom: 0.5em">
               📣 [06/2024] We've <b>Docker</b>-ized SWE-bench for easier, containerized, reproducible evaluation.
-              [<a style="color:#0ca7ff" href="https://github.com/princeton-nlp/SWE-bench/tree/main/docs/20240627_docker">Report</a>]
+              [<a style="color:#0ca7ff" href="https://github.com/swe-bench/SWE-bench/tree/main/docs/20240627_docker">Report</a>]
             </p>
             <p style="margin-bottom: 0.5em">
               📣 [03/2024] Check out our latest work, <b>SWE-agent</b>, which achieves a 12.47% resolve rate on SWE-bench!
@@ -175,11 +175,7 @@ <h2 class="text-title">Leaderboard</h2>
                 </thead>
                 <tbody>
                   {% for item in leaderboard.results if not item.warning %}
-                  {% if item.date >= "2025-01-01" %}
-                  <tr style="background-color: #82CEFA;">
-                  {% else %}
                   <tr>
-                  {% endif %}
                     <td>
                       <p class="model-type">
                         {% if item.date >= "2025-01-01" %}🆕 {% endif %}
@@ -242,10 +238,11 @@ <h2 class="text-title">Leaderboard</h2>
               <br>
               - <span style="color:#0ea7ff;"><b>🤠 Open</b></span> refers to submissions that have open-source code. This does <i>not</i>
               necessarily mean the underlying model is open-source.
-              <br><br>
-              - If you would like to submit your model to the leaderboard, please check the <a href="submit.html">submission</a> page.
               <br>
-              - All submissions are Pass@1, do not use
+              - <span style="color:#0ea7ff;"><b>🆕 New</b></span> refers to the most recently submitted solutions.
+              <br><br>
+              If you'd like to submit to the leaderboard, please check <a href="submit.html">this</a> page.
+              All submissions are Pass@1, do not use
               <code style="color:black;background-color:#ddd;border-radius: 0.25em">hints_text</code>,
               and are in the unassisted setting.
             </p>

diff --git a/template/template_viewer.html b/template/template_viewer.html
@@ -63,7 +63,7 @@ <h1 style="font-size: 60px; padding-top: 0.4em">SWE-bench Analysis</h1>
                         <i class="fa fa-paperclip"></i> Paper&nbsp;
                     </button>
                 </a>
-                <a href="https://github.com/princeton-nlp/SWE-bench">
+                <a href="https://github.com/swe-bench/SWE-bench">
                     <button class="outline">
                         <i class="fab fa-github"></i> Code&nbsp;
                     </button>

diff --git a/viewer.html b/viewer.html
@@ -63,7 +63,7 @@ <h1 style="font-size: 60px; padding-top: 0.4em">SWE-bench Analysis</h1>
                         <i class="fa fa-paperclip"></i> Paper&nbsp;
                     </button>
                 </a>
-                <a href="https://github.com/princeton-nlp/SWE-bench">
+                <a href="https://github.com/swe-bench/SWE-bench">
                     <button class="outline">
                         <i class="fab fa-github"></i> Code&nbsp;
                     </button>