Skip to content

Commit a5559a3

Browse files
committed
[] pattern notation for specifying sequences of arguments
1 parent bd31502 commit a5559a3

3 files changed

Lines changed: 183 additions & 6 deletions

File tree

src/hyperbase/hyperedge.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ def arguments_with_role(self, argrole: str) -> list[Hyperedge]:
590590
argroles = connector.argroles()
591591
if len(argroles) > 0 and argroles[0] == "{":
592592
argroles = argroles[1:-1]
593-
argroles = argroles.replace(",", "")
593+
argroles = argroles.replace(",", "").replace("[", "").replace("]", "")
594594
for pos, role in enumerate(argroles):
595595
if role == argrole and pos < len(self) - 1:
596596
edges.append(self[pos + 1])
@@ -1129,7 +1129,8 @@ def normalise(self) -> Atom:
11291129
unordered = True
11301130
else:
11311131
unordered = False
1132-
ar = "".join(sorted(ar, key=lambda argrole: argrole_order[argrole]))
1132+
if "[" not in ar:
1133+
ar = "".join(sorted(ar, key=lambda argrole: argrole_order[argrole]))
11331134
if unordered:
11341135
ar = f"{{{ar}}}"
11351136
return self.replace_argroles(ar)
@@ -1156,7 +1157,7 @@ def is_pattern(self) -> bool:
11561157
- argument role matcher (unordered argument roles surrounded by curly brackets)
11571158
- functional patterns (var, atoms, lemma, ...)
11581159
"""
1159-
return self.is_wildcard() or "{" in self.argroles()
1160+
return self.is_wildcard() or "{" in self.argroles() or "[" in self.argroles()
11601161

11611162
def is_fun_pattern(self) -> bool:
11621163
return False

src/hyperbase/patterns/matcher.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,30 @@ def _matches_atomic_pattern(edge: Hyperedge, atomic_pattern: Hyperedge) -> bool:
189189
e_argroles = e_role[1]
190190
if len(ap_argroles_posopt) > 0 and ap_argroles_posopt[0] == "{":
191191
ap_argroles_posopt = ap_argroles_posopt[1:-1]
192-
else:
192+
elif "[" not in ap_argroles_posopt:
193193
ap_argroles_posopt = ap_argroles_posopt.replace(",", "")
194194
if len(e_argroles) > len(ap_argroles_posopt):
195195
return False
196196
else:
197197
return ap_argroles_posopt.startswith(e_argroles) # type: ignore[no-any-return]
198+
# else: has [...] ordered subsequence brackets, fall through
199+
200+
# check [...] contiguity constraints
201+
if "[" in ap_argroles_posopt:
202+
i = 0
203+
while i < len(ap_argroles_posopt):
204+
if ap_argroles_posopt[i] == "[":
205+
j = ap_argroles_posopt.index("]", i)
206+
group = ap_argroles_posopt[i + 1 : j]
207+
if group not in e_argroles:
208+
return False
209+
i = j + 1
210+
else:
211+
i += 1
212+
# strip brackets for count checking below
213+
ap_argroles_posopt = ap_argroles_posopt.replace("[", "").replace(
214+
"]", ""
215+
)
198216

199217
ap_argroles_parts = ap_argroles_posopt.split(",")
200218
ap_posroles = ap_argroles_parts[0]
@@ -413,10 +431,14 @@ def match(
413431
if len(argroles_posopt) > 0 and argroles_posopt[0] == "{":
414432
match_by_order = False
415433
argroles_posopt = argroles_posopt[1:-1]
434+
elif "[" in argroles_posopt:
435+
match_by_order = False
416436
else:
417437
match_by_order = True
418-
argroles = argroles_posopt.split(",")[0]
419-
argroles_opt = argroles_posopt.replace(",", "")
438+
argroles = argroles_posopt.replace("[", "").replace("]", "").split(",")[0]
439+
argroles_opt = (
440+
argroles_posopt.replace("[", "").replace("]", "").replace(",", "")
441+
)
420442

421443
if len(argroles) > 0:
422444
min_len = 1 + len(argroles)

tests/test_patterns.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,160 @@ def test_match_pattern_argroles_optionals2(self):
584584
{"X": hedge("hyperbase/Cp.s"), "Y": hedge("great/C"), "Z": hedge("today/J")}
585585
]
586586

587+
# [] ordered subsequence brackets -- is_pattern detection
588+
589+
def test_is_pattern_brackets1(self):
590+
assert hedge("go/Pd.[so]").is_pattern()
591+
592+
def test_is_pattern_brackets2(self):
593+
assert hedge("go/Pd.{[so]x}").is_pattern()
594+
595+
def test_is_pattern_brackets3(self):
596+
assert hedge("(go/Pd.[so] x/C y/C)").is_pattern()
597+
598+
# [] ordered subsequence brackets -- inside {}
599+
600+
def test_match_pattern_argroles_brackets1(self):
601+
"""[so] in {}: s,o must be contiguous in order, x anywhere"""
602+
assert match_pattern(
603+
"(is/Pd.sox hyperbase/Cp.s great/C today/C)",
604+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
605+
) == [{"X": hedge("great/C"), "Y": hedge("today/C")}]
606+
607+
def test_match_pattern_argroles_brackets2(self):
608+
"""[so] in {}: x can appear before the [so] group"""
609+
assert match_pattern(
610+
"(is/Pd.xso today/C hyperbase/Cp.s great/C)",
611+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
612+
) == [{"X": hedge("great/C"), "Y": hedge("today/C")}]
613+
614+
def test_match_pattern_argroles_brackets3(self):
615+
"""[so] in {}: fails when s,o are not contiguous (s_x_o)"""
616+
assert (
617+
match_pattern(
618+
"(is/Pd.sxo hyperbase/Cp.s today/C great/C)",
619+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
620+
)
621+
== []
622+
)
623+
624+
def test_match_pattern_argroles_brackets4(self):
625+
"""[so] in {}: fails when s,o are in wrong order (os)"""
626+
assert (
627+
match_pattern(
628+
"(is/Pd.osx great/C hyperbase/Cp.s today/C)",
629+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
630+
)
631+
== []
632+
)
633+
634+
def test_match_pattern_argroles_brackets5(self):
635+
"""[so] in {}: fails when required role x is missing"""
636+
assert (
637+
match_pattern(
638+
"(is/Pd.so hyperbase/Cp.s great/C)",
639+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
640+
)
641+
== []
642+
)
643+
644+
def test_match_pattern_argroles_brackets6(self):
645+
"""[so] in {}: extra roles are allowed"""
646+
assert match_pattern(
647+
"(is/Pd.xsoy today/C hyperbase/Cp.s great/C extra/C)",
648+
"(is/Pd.{[so]x} hyperbase/Cp.s *X *Y)",
649+
) == [{"X": hedge("great/C"), "Y": hedge("today/C")}]
650+
651+
def test_match_pattern_argroles_brackets7(self):
652+
"""larger bracket group [sor]"""
653+
assert match_pattern(
654+
"(is/Pd.sorx hyperbase/Cp.s great/C extra/C today/C)",
655+
"(is/Pd.{[sor]x} hyperbase/Cp.s *O *R *X)",
656+
) == [{"O": hedge("great/C"), "R": hedge("extra/C"), "X": hedge("today/C")}]
657+
658+
def test_match_pattern_argroles_brackets8(self):
659+
"""larger bracket group [sor] fails when not contiguous"""
660+
assert (
661+
match_pattern(
662+
"(is/Pd.soxr hyperbase/Cp.s great/C today/C extra/C)",
663+
"(is/Pd.{[sor]x} hyperbase/Cp.s *O *R *X)",
664+
)
665+
== []
666+
)
667+
668+
def test_match_pattern_argroles_brackets9(self):
669+
"""multiple bracket groups [so][xr]"""
670+
assert match_pattern(
671+
"(is/Pd.soxr hyperbase/Cp.s great/C today/C extra/C)",
672+
"(is/Pd.{[so][xr]} hyperbase/Cp.s *O *X *R)",
673+
) == [{"O": hedge("great/C"), "X": hedge("today/C"), "R": hedge("extra/C")}]
674+
675+
def test_match_pattern_argroles_brackets10(self):
676+
"""multiple bracket groups can appear in any order"""
677+
assert match_pattern(
678+
"(is/Pd.xrso today/C extra/C hyperbase/Cp.s great/C)",
679+
"(is/Pd.{[so][xr]} hyperbase/Cp.s *O *X *R)",
680+
) == [{"O": hedge("great/C"), "X": hedge("today/C"), "R": hedge("extra/C")}]
681+
682+
def test_match_pattern_argroles_brackets11(self):
683+
"""multiple bracket groups: fails when one group is not contiguous"""
684+
assert (
685+
match_pattern(
686+
"(is/Pd.soRx hyperbase/Cp.s great/C extra/C today/C)",
687+
"(is/Pd.{[so][xr]} hyperbase/Cp.s *O *X *R)",
688+
)
689+
== []
690+
)
691+
692+
def test_match_pattern_argroles_brackets_no_vars(self):
693+
"""bracket matching without variables"""
694+
assert match_pattern(
695+
"(is/Pd.sox hyperbase/Cp.s great/C today/C)",
696+
"(is/Pd.{[so]x} hyperbase/Cp.s * *)",
697+
) == [{}]
698+
699+
# [] ordered subsequence brackets -- outside {} (bare)
700+
701+
def test_match_pattern_argroles_bare_brackets1(self):
702+
"""[so] outside {}: matches exact argroles"""
703+
assert match_pattern(
704+
"(is/Pd.so hyperbase/Cp.s great/C)", "(is/Pd.[so] hyperbase/Cp.s *X)"
705+
) == [{"X": hedge("great/C")}]
706+
707+
def test_match_pattern_argroles_bare_brackets2(self):
708+
"""[so] outside {}: matches with extras before"""
709+
assert match_pattern(
710+
"(is/Pd.xso today/C hyperbase/Cp.s great/C)",
711+
"(is/Pd.[so] hyperbase/Cp.s *X)",
712+
) == [{"X": hedge("great/C")}]
713+
714+
def test_match_pattern_argroles_bare_brackets3(self):
715+
"""[so] outside {}: matches with extras after"""
716+
assert match_pattern(
717+
"(is/Pd.sox hyperbase/Cp.s great/C today/C)",
718+
"(is/Pd.[so] hyperbase/Cp.s *X)",
719+
) == [{"X": hedge("great/C")}]
720+
721+
def test_match_pattern_argroles_bare_brackets4(self):
722+
"""[so] outside {}: fails when reversed"""
723+
assert (
724+
match_pattern(
725+
"(is/Pd.os great/C hyperbase/Cp.s)",
726+
"(is/Pd.[so] hyperbase/Cp.s *X)",
727+
)
728+
== []
729+
)
730+
731+
def test_match_pattern_argroles_bare_brackets5(self):
732+
"""[so] outside {}: fails when not contiguous"""
733+
assert (
734+
match_pattern(
735+
"(is/Pd.sxo hyperbase/Cp.s today/C great/C)",
736+
"(is/Pd.[so] hyperbase/Cp.s *X)",
737+
)
738+
== []
739+
)
740+
587741
def test_match_pattern_match_connectors1(self):
588742
assert match_pattern(
589743
"(is/P hyperbase/Cp.s great/C)", "(PRED/P hyperbase/Cp.s X ...)"

0 commit comments

Comments
 (0)