First pass to optimize and improve

cli · BagToad · Jul 21, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
commit 8610d8ba8acf261143029969a9001b1d1396a29b
@@ -0,0 +1,7 @@
+name: Detect spam
+model: openai/gpt-4o-mini
+messages:
+  - role: system
+    content: "" # Since it's not a fix value, it should be generated and replaced at runtime
+  - role: user
+    content: "" # This will be replaced at runtime
@@ -9,10 +9,11 @@
 
 set -euo pipefail
 
-_prompt_file=".github/workflows/scripts/spam-detection/prompt.yml"
-_generate_sys_prompt_script=".github/workflows/scripts/spam-detection/generate-sys-prompt.sh"
-_generate_prompt_script=".github/workflows/scripts/spam-detection/generate-prompt.sh"
+# Determine absolute path to script directory based on where it is called from.
+# This allows the script to be run from any directory.
+SPAM_DIR="$(dirname "$(realpath "$0")")"
 
+# Retrieve and prepare information about issue for detection
 _issue_url="$1"
 if [[ -z "$_issue_url" ]]; then
     echo "error: issue URL is empty" >&2
@@ -21,21 +22,23 @@ fi
 
 _issue="$(gh issue view --json title,body "$_issue_url")"
 
-_issue_body="$(jq -r ".body" <<< "$_issue")"
-_issue_title="$(jq -r ".title" <<< "$_issue")"
+cat << EOF | _user_prompt
+<TITLE>
+$(jq -r ".title" <<< "$_issue")
+</TITLE>
 
-_system_prompt="$($_generate_sys_prompt_script)"
-_input_prompt="$($_generate_prompt_script "$_issue_title" "$_issue_body")"
+<BODY>
+$(jq -r ".body" <<< "$_issue")
+</BODY>
+EOF
 
-_updated_prompt_file_content="$(
-    cat "$_prompt_file" |
-    yq eval 'del(.testData, .evaluators)' | # drop test data
-    _system="$_system_prompt" _input="$_input_prompt" yq eval ".messages[0].content = strenv(_system) | .messages[1].content = strenv(_input)"
-)"
+# Generate dynamic prompts for inference
+_system_prompt="$($SPAM_DIR/generate-sys-prompt.sh)"
+_final_prompt="$(_system="$_system_prompt" _user="$_user_prompt" yq eval ".messages[0].content = strenv(_system) | .messages[1].content = strenv(_user)" "$SPAM_DIR/check-issue-prompts.yml")"
 
 gh extension install github/gh-models 2>/dev/null
 
-_result="$(gh models run --file <(echo "$_updated_prompt_file_content") | cat)"
+_result="$(gh models run --file <(echo "$_final_prompt") | cat)"
 
 if [[ "$_result" != "PASS" && "$_result" != "FAIL" ]]; then
     echo "error: expected PASS or FAIL but got an unexpected result: $_result" >&2

@@ -4,7 +4,7 @@ messages:
   - role: system
     content: "" # Since it's not a fix value, it should be generated and replaced at runtime
   - role: user
-    content: "{{input}}"
+    content: "{{input}}" # This will be replaced by `gh models eval` from `testData` below
 evaluators:
   - name: assert response
     string:
@@ -27,11 +27,11 @@ testData:
       > ```yaml
       > name: Dependabot fetch metadata
       > on: pull_request
-      > 
+      >
       > permissions:
       >   pull-requests: write
       >   issues: write
-      > 
+      >
       > jobs:
       >   dependabot:
       >     runs-on: ubuntu-latest
@@ -160,7 +160,7 @@ testData:
       <BODY>
       ### Describe the bug
 
-      A clear and concise description of what the bug is. 
+      A clear and concise description of what the bug is.
 
       ### Affected version
 
@@ -193,7 +193,7 @@ testData:
       <BODY>
       ### Describe the bug
 
-      A clear and concise description of what the bug is. 
+      A clear and concise description of what the bug is.
 
       ### Affected version
 
@@ -224,10 +224,10 @@ testData:
       </TITLE>
 
       <BODY>
-      > Thanks for submitting this, @brettdh! 🙏 
-      > 
+      > Thanks for submitting this, @brettdh! 🙏
+      >
       > Currently, this is similar to the GraphQL API request that `gh` uses to fetch the list of releases:
-      > 
+      >
       > ```gql
       > query {
       >   repository(owner:"cli", name:"cli") {
@@ -246,12 +246,12 @@ testData:
       >   }
       > }
       > ```
-      > 
+      >
       > You can simply try it on [GraphQL Explorer](https://docs.github.com/en/graphql/overview/explorer).
-      > 
+      >
       > There's a bit of terminology difference here, but I think what you're referring to as *notes* is actually the `description` field on a `Release` type. Can you please confirm that's what you're asking for? You can try adding `description` to the list of fields in the above query and see if that fits your purpose.
-      > 
-      >  
+      >
+      >
 
       _Originally posted by @babakks in [#11241](https://github.com/cli/cli/issues/11241#issuecomment-3049268353)_
       </BODY>
@@ -263,7 +263,7 @@ testData:
       </TITLE>
 
       <BODY>
-      ###I believe there might be a bug or just looking for work jobid 
+      ###I believe there might be a bug or just looking for work jobid
 
       Maybe theirs problem in the with log fails
 

@@ -6,21 +6,23 @@
 
 set -euo pipefail
 
-_prompt_file=".github/workflows/scripts/spam-detection/prompt.yml"
-_generate_sys_prompt_script=".github/workflows/scripts/spam-detection/generate-sys-prompt.sh"
+# Determine absolute path to script directory based on where it is called from.
+# This allows the script to be run from any directory.
+SPAM_DIR="$(dirname "$(realpath "$0")")"
 
-_system_prompt="$($_generate_sys_prompt_script)"
-_updated_prompt_file="$(_value="$_system_prompt" yq eval '.messages[0].content = strenv(_value)' "$_prompt_file")"
+# Generate dynamic prompts for inference
+_system_prompt="$($SPAM_DIR/generate-sys-prompt.sh)"
+_final_prompt="$(_value="$_system_prompt" yq eval '.messages[0].content = strenv(_value)' $SPAM_DIR/eval-prompts.yml)"
 
 # We should be able to just run the following command:
 #
 # ```
-# gh models eval <(echo "$_updated_prompt_file")
+# gh models eval <(echo "$_final_prompt")
 # ```
 #
 # But since `gh-models` does not throttle the rate of API requests, we need to
 # modify the extension code and introduce a deliberate delay between the runs.
 # Here, we assume a binary of the `gh-models` extension (with appropriate
 # throttling) is available in the root directory of the repository and we're
 # calling it directly (not though `gh`).
-./gh-models eval <(echo "$_updated_prompt_file")
+gh models eval <(echo "$_final_prompt")