Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
#TESTAR v2.8.5 (17-Mar-2026)
- Bump io.appium:java-client from 10.0.0 to 10.1.0
- Update JS getIsBlockedTestar logic
- Add continue, completed, invalid, LLM Oracle logic
- Add LlmTestGoalOrchestrator logic
- Add WebIsDisplayed to StateCondition
- Update selectAction in model evaluator protocols to avoid LLM select extra actions


#TESTAR v2.8.4 (12-Mar-2026)
- Remove abstract actionIds from LLM ActionHistory
- Update getElementDescription prioritty and length
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.8.4
2.8.5
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ subprojects {
// https://mvnrepository.com/artifact/io.github.bonigarcia/webdrivermanager
implementation group: 'io.github.bonigarcia', name: 'webdrivermanager', version: '6.3.3'
// https://mvnrepository.com/artifact/io.appium/java-client
implementation group: 'io.appium', name: 'java-client', version: '10.0.0'
implementation group: 'io.appium', name: 'java-client', version: '10.1.0'
// https://mvnrepository.com/artifact/com.orientechnologies/orientdb-graphdb
implementation group: 'com.orientechnologies', name: 'orientdb-graphdb', version: '3.2.38'
// https://mvnrepository.com/artifact/org.graalvm.sdk/graal-sdk
Expand Down
6 changes: 5 additions & 1 deletion core/src/org/testar/StateManagementTags.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,8 @@ public enum Group {General, ControlPattern, WebDriver}
public static final Tag<String> WebWidgetId = Tag.from("Web Widget id", String.class);
public static final Tag<String> WebWidgetName = Tag.from("Web Widget name", String.class);
public static final Tag<String> WebWidgetTagName = Tag.from("Web Widget tag name", String.class);
public static final Tag<String> WebWidgetTextContent= Tag.from("Web Widget text content", String.class);
public static final Tag<String> WebWidgetTextContent = Tag.from("Web Widget text content", String.class);
public static final Tag<String> WebWidgetInnerText = Tag.from("Web Widget inner text", String.class);
public static final Tag<String> WebWidgetTitle = Tag.from("Web Widget title", String.class);
public static final Tag<String> WebWidgetHref = Tag.from("Web Widget href", String.class);
public static final Tag<String> WebWidgetValue = Tag.from("Web Widget value", String.class);
Expand Down Expand Up @@ -402,6 +403,7 @@ public enum Group {General, ControlPattern, WebDriver}
add(WebWidgetName);
add(WebWidgetTagName);
add(WebWidgetTextContent);
add(WebWidgetInnerText);
add(WebWidgetTitle);
add(WebWidgetHref);
add(WebWidgetValue);
Expand Down Expand Up @@ -501,6 +503,7 @@ public static boolean isStateManagementTag(Tag<?> tag) {
settingsMap.put(WebWidgetName, "WebWidgetName");
settingsMap.put(WebWidgetTagName, "WebWidgetTagName");
settingsMap.put(WebWidgetTextContent, "WebWidgetTextContent");
settingsMap.put(WebWidgetInnerText, "WebWidgetInnerText");
settingsMap.put(WebWidgetTitle, "WebWidgetTitle");
settingsMap.put(WebWidgetHref, "WebWidgetHref");
settingsMap.put(WebWidgetValue, "WebWidgetValue");
Expand Down Expand Up @@ -590,6 +593,7 @@ public static boolean isStateManagementTag(Tag<?> tag) {
put(WebWidgetName, Group.WebDriver);
put(WebWidgetTagName, Group.WebDriver);
put(WebWidgetTextContent, Group.WebDriver);
put(WebWidgetInnerText, Group.WebDriver);
put(WebWidgetTitle, Group.WebDriver);
put(WebWidgetHref, Group.WebDriver);
put(WebWidgetValue, Group.WebDriver);
Expand Down
2 changes: 1 addition & 1 deletion core/src/org/testar/monkey/alayer/Verdict.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
package org.testar.monkey.alayer;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -104,6 +103,7 @@ public enum Severity {
/** CRITICAL (0.9 - 1.0) **/

CRITICAL(0.9, "CRITICAL"),
LLM_INVALID(0.91, "LLM_INVALID"), // LLM detected objective steps were followed but resulting state is invalid
NOT_RESPONDING(0.99999990, "NOT_RESPONDING"), // Unresponsive
UNEXPECTEDCLOSE(0.99999999, "UNEXPECTEDCLOSE"), // Crash? Unexpected close?
FAIL(1.0, "FAIL");
Expand Down
58 changes: 58 additions & 0 deletions testar/resources/prompts/fewshot_gemini_oracle_goal_status.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[
{
"role": "user",
"parts": [
{
"text": "You are a web test agent. You will receive a test objective and a list of widgets that describe the current application state. Decide one verdict only: CONTINUE (goal not met yet), COMPLETED (goal met correctly), or INVALID (the expected end result is contradicted by the current state, indicating a functional issue). Use INVALID only when there is clear evidence that the expected result should be present but the observed state indicates an incorrect behavior. If evidence is not sufficient, choose CONTINUE. Respond in JSON only with this schema: {\"status\":\"CONTINUE|COMPLETED|INVALID\",\"info\":\"short reason\"}."
}
]
},
{
"role": "user",
"parts": [
{
"text": "We are testing the \"Demo\" web application. The objective of the test is to log in with username \"John\" and password \"Smith\". The current state of the application contains the widgets: Widget: Customer Login, Widget: Username, Widget: Password, Widget: Forgot login info?. Is the test objective met in this state?"
}
]
},
{
"role": "model",
"parts": [
{
"text": "{\"status\":\"CONTINUE\",\"info\":\"login form is still present and no successful session is visible\"}"
}
]
},
{
"role": "user",
"parts": [
{
"text": "We are testing the \"Demo\" web application. The objective of the test is to log in with username \"John\" and password \"Smith\". The current state of the application contains the widgets: Widget: Welcome John Smith, Widget: Account Services, Widget: Log Out. Is the test objective met in this state?"
}
]
},
{
"role": "model",
"parts": [
{
"text": "{\"status\":\"COMPLETED\",\"info\":\"user is authenticated and the expected welcome message is visible\"}"
}
]
},
{
"role": "user",
"parts": [
{
"text": "We are testing the \"Demo\" web application. The objective of the test is to log in with valid credentials username \"John\" and password \"Smith\" and verify successful authentication. The current state of the application contains the widgets: Widget: Login failure, Widget: Invalid Credentials. Is the test objective met in this state?"
}
]
},
{
"role": "model",
"parts": [
{
"text": "{\"status\":\"INVALID\",\"info\":\"The objective expects successful authentication with valid credentials, but the state shows a contradictory login failure\"}"
}
]
}
]
30 changes: 30 additions & 0 deletions testar/resources/prompts/fewshot_openai_oracle_goal_status.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
{
"role": "system",
"content": "You are a web test agent. You will receive a test objective and a list of widgets that describe the current application state. Decide one verdict only: CONTINUE (goal not met yet), COMPLETED (goal met correctly), or INVALID (the expected end result is contradicted by the current state, indicating a functional issue). Use INVALID only when there is clear evidence that the expected result should be present but the observed state indicates an incorrect behavior. If evidence is not sufficient, choose CONTINUE. Respond in JSON only with this schema: {\"status\":\"CONTINUE|COMPLETED|INVALID\",\"info\":\"short reason\"}."
},
{
"role": "user",
"content": "We are testing the \"Demo\" web application. The objective of the test is to log in with username \"John\" and password \"Smith\". The current state of the application contains the widgets: Widget: Customer Login, Widget: Username, Widget: Password, Widget: Forgot login info?. Is the test objective met in this state?"
},
{
"role": "assistant",
"content": "{\"status\":\"CONTINUE\",\"info\":\"The login form is still present and no successful session is visible\"}"
},
{
"role": "user",
"content": "We are testing the \"Demo\" web application. The objective of the test is to log in with username \"John\" and password \"Smith\". The current state of the application contains the widgets: Widget: Welcome John Smith, Widget: Account Services, Widget: Log Out. Is the test objective met in this state?"
},
{
"role": "assistant",
"content": "{\"status\":\"COMPLETED\",\"info\":\"The John user is authenticated and the expected welcome message is visible\"}"
},
{
"role": "user",
"content": "We are testing the \"Demo\" web application. The objective of the test is to log in with valid credentials username \"John\" and password \"Smith\" and verify successful authentication. The current state of the application contains the widgets: Widget: Login failure, Widget: Invalid Credentials. Is the test objective met in this state?"
},
{
"role": "assistant",
"content": "{\"status\":\"INVALID\",\"info\":\"The objective expects successful authentication with valid credentials, but the state shows a contradictory login failure\"}"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.testar.SutVisualization;
import org.testar.action.priorization.llm.LlmActionSelector;
import org.testar.llm.LlmTestGoal;
import org.testar.llm.LlmTestGoalOrchestrator;
import org.testar.llm.prompt.OracleWebPromptGenerator;
import org.testar.llm.prompt.ActionWebPromptGenerator;
import org.testar.managers.InputDataManager;
Expand All @@ -51,20 +52,18 @@

import static org.testar.monkey.alayer.Tags.Blocked;
import static org.testar.monkey.alayer.Tags.Enabled;
import static org.testar.monkey.alayer.webdriver.Constants.scrollArrowSize;
import static org.testar.monkey.alayer.webdriver.Constants.scrollThick;

public class Protocol_03_webdriver_llm_parabank extends WebdriverProtocol {

// The LLM Action selector needs to be initialize with the settings
private LlmActionSelector llmActionSelector;
private List<LlmTestGoal> testGoals = new ArrayList<>();
private Queue<LlmTestGoal> testGoalQueue;
private LlmTestGoal currentTestGoal;

// The LLM Oracle needs to be initialize with the settings
private LlmOracle llmOracle;

private List<LlmTestGoal> testGoals = new ArrayList<>();
private LlmTestGoalOrchestrator testGoalOrchestrator;

/**
* Called once during the life time of TESTAR
* This method can be used to perform initial setup work
Expand All @@ -83,6 +82,11 @@ protected void initialize(Settings settings) {

// Initialize the LlmOracle using the LLM settings
llmOracle = new LlmOracle(settings, new OracleWebPromptGenerator());

testGoalOrchestrator = new LlmTestGoalOrchestrator(testGoals, (goal, appendPreviousGoal) -> {
llmActionSelector.reset(goal, appendPreviousGoal);
llmOracle.reset(goal, appendPreviousGoal);
});
}

/**
Expand All @@ -91,16 +95,7 @@ protected void initialize(Settings settings) {
@Override
protected void preSequencePreparations() {
super.preSequencePreparations();

// Setup test goal queue
testGoalQueue = new LinkedList<>();
testGoalQueue.addAll(testGoals);
currentTestGoal = testGoalQueue.poll();

// Reset llm action selector
llmActionSelector.reset(currentTestGoal, false);
// Reset llm oracle
llmOracle.reset(currentTestGoal, false);
testGoalOrchestrator.startSequence();
}

private void setupTestGoals(List<String> testGoalsList) {
Expand Down Expand Up @@ -162,28 +157,12 @@ protected State getState(SUT system) throws StateBuildException {
*/
@Override
protected List<Verdict> getVerdicts(State state) {
// Use the LLM as an Oracle to determine if the test goal has been completed
List<Verdict> llmVerdicts = llmOracle.getVerdicts(state);

for(Verdict llmVerdict : llmVerdicts) {
if(llmVerdict.severity() == Verdict.Severity.LLM_COMPLETE.getValue()) {
// Test goal was completed, retrieve next test goal from queue.
currentTestGoal = testGoalQueue.poll();

// Poll returns null if there are no more items remaining in the queue.
if(currentTestGoal == null) {
// No more test goals remaining, terminate sequence.
System.out.println("Test goal completed, but no more test goals.");
return Collections.singletonList(llmVerdict);
} else {
System.out.println("Test goal completed, moving to next test goal.");
llmActionSelector.reset(currentTestGoal, true);
llmOracle.reset(currentTestGoal, true);
}
}
}
List<Verdict> verdicts = super.getVerdicts(state);

// Add the LLM Oracle verdicts to determine if the test goal has been completed
verdicts.addAll(testGoalOrchestrator.processGoalVerdicts(llmOracle.getVerdicts(state)));

return super.getVerdicts(state);
return verdicts;
}

/**
Expand Down Expand Up @@ -229,9 +208,6 @@ protected Set<Action> deriveActions(SUT system, State state) throws ActionBuildE
continue;
}

// slides can happen, even though the widget might be blocked
//addSlidingActions(actions, ac, scrollArrowSize, scrollThick, widget);

// If the element is blocked, Testar can't click on or type in the widget
if (widget.get(Blocked, false) && !widget.get(WdTags.WebIsShadow, false)) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ As a user, I want to log in with the credentials john/demo
Then the Welcome John Smith message is shown
;
As a user, I want to request a loan with an amount '999999', a down payment of '100', for the account '54321'
Then the loan is denied due to insufficient funds
Then the loan is approved by the bank entity
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
As a user, I want to log in with the credentials john/demo
Then the Welcome John Smith message is shown
;
As a user, I want to request a loan with an amount '500', a down payment of '100', for the account '54321'
Then the loan is approved by the bank entity
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ DataStoreUser =
DataStorePassword =
DataStoreMode = none
ApplicationName = parabank
ApplicationVersion = loan_denied
ApplicationVersion = request_loan
ActionSelectionAlgorithm = random
StateModelStoreWidgets = false
ResetDataStore = false
Expand All @@ -173,11 +173,11 @@ LlmReasoning = minimal
LlmHostUrl = https://api.openai.com/v1/chat/completions
LlmAuthorizationHeader = Bearer %OPENAI_API%
LlmActionFewshotFile = prompts/fewshot_openai_action.json
LlmOracleFewshotFile = prompts/fewshot_openai_oracle.json
LlmOracleFewshotFile = prompts/fewshot_openai_oracle_goal_status.json
LlmTemperature = 0.2
LlmHistorySize = 5
LlmStateless = true
LlmTestGoals = As a user, I want to log in with the credentials john/demo\\nThen a Request Loan link is shown;As a user, I want to request a loan with an amount '999999', a down payment of '100', for the account '54321'\\nThen the loan is denied due to insufficient funds
LlmTestGoals = As a user, I want to log in with the credentials john/demo\\nThen a Request Loan link is shown;As a user, I want to request a loan with an amount '999999', a down payment of '100', for the account '54321'\\nThen the loan is approved by the bank entity

#################################################################
# WebDriver features
Expand Down
Loading
Loading