Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,23 @@ patternCharacter
| BRACE_close
| BRACKET_close
| COLON
| DOUBLE_AMPERSAND // char class intersection not supported by default in JS, only supported if "v" flag is turned on.
;


characterClass
//TODO check if lookahead needed, or implicit in rule order resoution
//[ [lookahead ∉ {^}] ClassRanges ]
: BRACKET_open CARET classRanges BRACKET_close
| BRACKET_open classRanges BRACKET_close
;
: BRACKET_open CARET classContents BRACKET_close
| BRACKET_open classContents BRACKET_close
;

classContents
: classUnion (DOUBLE_AMPERSAND classUnion)*
;

classUnion
: characterClass+ // one or more nested classes = UNION
| classRanges // bare ranges
;

classRanges
:
Expand Down Expand Up @@ -261,6 +269,10 @@ atomEscape
//------ LEXER ------------------------------
// Lexer rules have first letter in upper-case

DOUBLE_AMPERSAND
: '&&'
;

DecimalDigit
: [0-9]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.evomaster.core.parser
import org.evomaster.core.search.gene.Gene
import org.evomaster.core.search.gene.regex.*
import org.evomaster.core.utils.CharacterRange
import org.evomaster.core.utils.MultiCharacterRange
import org.evomaster.core.utils.ParsedFlagExpression
import org.evomaster.core.utils.RegexFlags

Expand Down Expand Up @@ -398,11 +399,44 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){

val negated = ctx.CARET() != null

val ranges = ctx.classRanges().accept(this).data as List<CharacterRange>
val innerMultiCharRanges = ctx.classContents().accept(this).data as MultiCharacterRange

val gene = CharacterRangeRxGene(negated, ranges, currentFlags)
val multiCharRanges = MultiCharacterRange(negated, innerMultiCharRanges)

return VisitResult(gene)
return if (ctx.parent is RegexJavaParser.AtomContext){
// top level character class, create gene
VisitResult(CharacterRangeRxGene(multiCharRanges, currentFlags))
} else {
// nested char class, set MultiCharacterRange as data
VisitResult(data = multiCharRanges)
}
}

override fun visitClassContents(ctx: RegexJavaParser.ClassContentsContext): VisitResult {

// intersect the unions of ranges
val mcr = ctx.classUnion()
.map { it.accept(this).data as MultiCharacterRange }
.reduce { acc, item -> MultiCharacterRange.intersect(acc, item) }

return VisitResult(data=mcr)
}

override fun visitClassUnion(ctx: RegexJavaParser.ClassUnionContext): VisitResult {

return if (ctx.characterClass().isNotEmpty()) {
// union of char classes
val mcr = ctx.characterClass()
.map { it.accept(this).data as MultiCharacterRange }
.reduce { acc, item -> MultiCharacterRange.union(acc, item) }

VisitResult(data=mcr)
} else {
// single classRanges
val ranges = ctx.classRanges().accept(this).data as List<CharacterRange>

VisitResult(data=MultiCharacterRange(false, ranges))
}
}

override fun visitClassRanges(ctx: RegexJavaParser.ClassRangesContext): VisitResult {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import org.evomaster.core.utils.MultiCharacterRange
import org.evomaster.core.utils.RegexFlags
import org.slf4j.LoggerFactory

class CharacterRangeRxGene private constructor(
class CharacterRangeRxGene(
/**
* this represents the valid ranges for a character class, removing overlaps and applying negation
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ class MultiCharacterRange internal constructor(val ranges: List<CharacterRange>)
return MultiCharacterRange(negated, characters.map { CharacterRange(it, it) })
}

operator fun invoke(negated: Boolean, multiCharRange: MultiCharacterRange): MultiCharacterRange {
return if (negated) {
MultiCharacterRange(true, multiCharRange.ranges)
} else {
multiCharRange
}
}

operator fun invoke(negated: Boolean, ranges: List<CharacterRange>): MultiCharacterRange {
if (ranges.isEmpty()) {
throw IllegalArgumentException("No defined ranges")
Expand Down Expand Up @@ -93,6 +101,87 @@ class MultiCharacterRange internal constructor(val ranges: List<CharacterRange>)
}
}.toMutableList()
}

/**
* Create an intersection from two [org.evomaster.core.utils.MultiCharacterRange] instances
* Used to allow character class intersections (e.g.: `[a-z0-9&&[0-9A-Z]]`).
*/
fun intersect(a: MultiCharacterRange, b: MultiCharacterRange): MultiCharacterRange {
val result = mutableListOf<CharacterRange>()

var idxA = 0
var idxB = 0

val lenA = a.size
val lenB = b.size

while (idxA < lenA && idxB < lenB) {
val start = maxOf(a[idxA].start, b[idxB].start)
val end = minOf(a[idxA].end, b[idxB].end)

if (start <= end) {
result.add(CharacterRange(start, end))
}

if ( a[idxA].end < b[idxB].end ) {
idxA++
} else {
idxB++
}
}

return MultiCharacterRange(result)
}

/**
* Creates a union from two [MultiCharacterRange] instances, merging overlapping
* and adjacent ranges into a single normalized [MultiCharacterRange].
* Used to allow character class unions (e.g.: `[[a-c][x-z]]`).
*/
fun union(a: MultiCharacterRange, b: MultiCharacterRange): MultiCharacterRange {
val result = mutableListOf<CharacterRange>()
var idxA = 0
var idxB = 0

while (idxA < a.size && idxB < b.size) {
// pick the range with the smaller start
val (start, end) = if (a[idxA].start <= b[idxB].start) {
a[idxA].start to a[idxA].end.also { idxA++ }
} else {
b[idxB].start to b[idxB].end.also { idxB++ }
}

// merge with last range in result if overlapping or adjacent
if (result.isNotEmpty() && start.code <= result.last().end.code + 1) {
val last = result.removeLast()
result.add(CharacterRange(last.start, maxOf(last.end, end)))
} else {
result.add(CharacterRange(start, end))
}
}

// append remaining ranges from whichever list isn't exhausted
while (idxA < a.size) {
val curr = a[idxA++]
if (result.isNotEmpty() && curr.start.code <= result.last().end.code + 1) {
val last = result.removeLast()
result.add(CharacterRange(last.start, maxOf(last.end, curr.end)))
} else {
result.add(curr)
}
}
while (idxB < b.size) {
val curr = b[idxB++]
if (result.isNotEmpty() && curr.start.code <= result.last().end.code + 1) {
val last = result.removeLast()
result.add(CharacterRange(last.start, maxOf(last.end, curr.end)))
} else {
result.add(curr)
}
}

return MultiCharacterRange(result)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,27 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() {
override fun testJSExclusiveEscapes() {
// JS exclusive
}

@Test
fun testCharClassIntersectionSubtractionAndNesting(){
checkSameAsJava("[abc-e[f-h]ij-l[m]n]")
checkSameAsJava("[a&&a][a&&a&&a]")
checkSameAsJava("[a-z&&[aeiou]]")
checkSameAsJava("[a-z&&[^aeiou]]")
checkSameAsJava("[a-z&&[a-p]&&[f-z]]")
checkSameAsJava("[ac-e&&[a-d]]")
checkSameAsJava("[\\w&&[a-z]]")
checkSameAsJava("[a-z&&[b-y]]")
checkSameAsJava("[a-z0-9&&[A-Z0-9]&&[2B4C]]")
checkSameAsJava("[[a-c][x-z]&&[b-y]]")
checkSameAsJava("[a-c&&[b-d]e-g]")
checkSameAsJava("[^a-z&&[^aeiou]]")
checkSameAsJava("[\\s&&[^\\n]]")
checkSameAsJava("[a-c&&[c-e]]")
checkSameAsJava("[a-z&&[a-z]]")
checkSameAsJava("[a-ce-g&&[b-f]]")
checkSameAsJava("[[a-z&&[a-p]]&&[f-z]]")
checkSameAsJava("[a[b[c[d&&[\\w]]]][0-7&&\\d&&[0-5]&&1-5]]")
checkSameAsJava("&&")
}
}
Loading