DELIMITERS = "<$.>" "$." "<$!>" "<$?>" "<$\;>" "<$¶>" ; # sentence window MAPPING-PREFIX = @ ; # @ for syntax or morphological disambiguation, % or § for higher-level annotation with input that already contains @ tags SETS LIST >>> = >>> ; LIST ALL = N PROP ADJ DET PERS SPEC ADV V PRP KS KC IN ; # all word classes (but not punctuation) LIST NOMINAL = N PROP ADJ (PCP2 STA) ; # nominals, i.e. potentieal nominal heads LIST N = N ; LIST N-HUM = r ( ) ("an" <*> N P) ; LIST = FEM (".*ess"r r) "girl" "wife" "woman" ; LIST ADJ = ADJ ; LIST DET = DET ; LIST ART = ART ; LIST PRE-N = ART DET NUM ADJ STA ; # prenominals SET NON-PRE-N = (*) - PRE-N ; SET NON-PRE-N/ADV = (*) - PRE-N - (ADV) ; LIST P = P S/P ; # plural SET PRE-N-P = PRE-N + P ; # plural prenominals, equivalent to (ART DEF) (DET P) (DET S/P) (NUM P) ADJ (PCP2 STA) ; LIST VFIN = PR PAST IMPF IMP ; LIST INF = INF ; LIST VV = PR PAST IMPF INP INF AKT PAS &AUX &MV ; LIST PRP = PRP ; LIST ADV = ADV ; SET NON-ADV = (*) - (ADV) ; LIST CLB = "<$,>" KS ; # clause boundaries LIST KOMMA = "<$,>" ; LIST V-SPEAK = "answer" "say" "tell" ; # speech verbs LIST @MV = @FMV @IMV &MV ; # main verbs LIST &MV = &MV ; LIST &AUX = &AUX ; LIST @FUNC = @SUBJ> @ @ @P< @ = @SUBJ> @F-SUBJ> @ACC> @DAT> @SC> @OC> @ADVL> @SA> @OA> @PIV> ; LIST @FS- = @FS-SUBJ> @FS-ACC> @FS-DAT> @FS-SC> @FS-OC> @FS-ADVL> @FS-SA> @FS-OA> ; LIST @ICL- = @ICL-SUBJ> @ICL-ACC> @ICL-DAT> @ICL-SC> @ICL-OC> @ICL-ADVL> @ICL-SA> @ICL-OA> ; LIST IMP = IMP ; LIST PERS = PERS ; LIST PROP = PROP ( N) ; LIST NUM = NUM ; SET @FS/ICL = @FS- OR @ICL- OR (@FS-N<) OR (@FS-P<) ; CONSTRAINTS ### noun phrase SELECT (ADJ) (*1 N BARRIER (*) - (ADJ)) ; SELECT (N) (*-1 (ART) BARRIER NON-PRE-N) ; REMOVE (N) (-1 (PERS NOM)) ; REMOVE (N NOM) (1C PRE-N) (0 VFIN) ; "" SELECT NUM (-1 N OR PROP) ; ### verbs REMOVE (IMP) OR (INF) (-1 (PERS)) ; REMOVE IMP (NOT -1 >>>) (NEGATE -1 KOMMA LINK -1 PROP OR ADV LINK -1 >>>) ; "" SELECT PERS (NOT -1 N OR PROP) (1 VFIN) ; SELECT (PAS) (*-1 ("be") BARRIER (*) - ADV) ; ### prepositions vs. adverbs "" REMOVE ADV (0 PRP) (*1C PRE-N OR N) ; REMOVE (N P) IF (-1C PRE-N) (NOT -1 PRE-N-P) ; # remove a plural noun reading if there is a safe prenominal to the left that is not compatible with a plural reading REMOVE VFIN OR INF (-1C ART OR (GEN)) ; REMOVE VFIN OR INF (*-1C ART OR (GEN) BARRIER NON-PRE-N/ADV) ; # remove finte verb and infinitive readings if there is an article to the left REMOVE VFIN IF (*1 VFIN BARRIER CLB OR (KC) LINK *1 VFIN BARRIER CLB OR (KC)) ; # remove a finite verb reading if there are to more finite verbs to the right none of them barred by a clause boundary (CLB) and coordinating conjunction (KC). "" SELECT (KS) (*-1 V-SPEAK BARRIER ALL - (ADV)) ; # select the conjunction reading for the word form 'que', if there is a speech-verb to the left with nothing but advers in between. CORRECTIONS SUBSTITUTE (S/P) (P) TARGET (DET) (1C (P)) ; SECTION ADD (&AUX) TARGET ("be") (*1 (PCP2 PAS) BARRIER VV) ; ADD (&AUX) TARGET ("have") (*1 (PCP2 AKT) BARRIER VV) ; ADD (&AUX) TARGET () (*1 (INF) BARRIER (INFM)) ; ADD (&MV) TARGET (PCP2 PAS) OR (PCP2 AKT) OR VFIN OR (INF) (NOT 0 (&AUX)) ; MAP (@P< @>N) TARGET (N NOM) (*-1C PRP BARRIER NON-PRE-N/ADV) ; MAP (@SUBJ>) TARGET (NOM) OR (INDP) OR (DET) (*1 VFIN BARRIER NON-ADV) ; #MAP (@SUBJ> @ACC>) TARGET (N NOM) (NOT *-1 NON-PRE-N/ADV) (*1C VFIN) ; #MAP (@N) TARGET (N GEN) ; MAP (@>N) TARGET PRE-N (*1C N BARRIER NON-PRE-N) ; MAP (@ @N) TARGET (N NOM) ; CONSTRAINTS REMOVE (@SUBJ>) IF (NOT *1 VFIN) ; # remove a forward subject if there's no finite verb to the right REMOVE (@SUBJ>) IF (*1 CLB BARRIER VFIN) ; # remove a forward subject if there's no finite verb to the right REMOVE (N NOM @>N) (NOT 1 N) ; SELECT N + $$@FUNC (-1 KOMMA) (-2C $$@FUNC) (1 KOMMA OR ("and") OR ("or") LINK 1 $$@FUNC) ; SELECT N + $$@FUNC (-1 ("and") OR ("or")) (-2C $$@FUNC) ; SUBSTITUTE (N) ( N) TARGET $$@ ( DET) NUM ; # #SUBSTITUTE (N) ( N) TARGET (N NOM) (*-1 DEF-EDGE BARRIER NON-PRE-N/ADV) ; #SUBSTITUTE (N) ( N) TARGET (N NOM) (c DEF-EDGE) ; ###### complex tense ### SUBSTITUTE (V) ( V) TARGET (INF @ICL-AUX<) (p VV LINK 0 ("will") OR ("shall")) ; ### Information Extraction / Retrieval ADD (%LOC) TARGET ( @P<) (*-1 PRP LINK 0 ("in") LINK 0 (@)) ; # Place: in ADD (%LOC-TMP) TARGET (NUM @P<) (-1 ("in" PRP)) ; # time: in 1998 ### Anaphora SETRELATION (ref) TARGET ("he") (0 (NOM) OR (ACC)) TO (**-1WA (@SUBJ>) + N-HUM LINK 0 (S) LINK NOT 0 LINK NEGATE *1 @FS-)) ; SETRELATION (ref) TARGET ("they") (0 (NOM) OR (ACC)) TO (**-1WA (@SUBJ>) + (N P) LINK NEGATE *1 @FS-)) ; SETRELATION (ref) TARGET ("they" @>N) TO (**-1W (N P) + @FUNC) ; ### Grammar checking ADD (§3S) TARGET (V -3S) OR (INF) OR (IMP) (*-1C (S NOM) OR (3S NOM) BARRIER NON-ADV LINK *-1 VV OR CLB OR >>> BARRIER (KC) LINK NOT 0 ()) ; ### Machine translation ###### Add person/number to English verbs for translation into Romance or Slavic languages LIST PN = 1S 2S 3S 1P 2P 3P ; SUBSTITUTE (-3S) (1S) TARGET VFIN (c (@SUBJ>) LINK cS (1S)) ; # covers both 'I' and 'my aunt' SUBSTITUTE (-3S) $$PN TARGET VFIN (c (@SUBJ>) LINK cS $$PN) ; # generalisation SUBSTITUTE (IMPF) (IMPF) + $$PN TARGET VFIN (c (@SUBJ>) LINK cS $$PN) ; # generalisation SUBSTITUTE (IMPF) (IMPF 3P) TARGET VFIN (c (@SUBJ>) LINK 0 (P) OR (r)) ; # ##### handle polysemy or usage distinctions ADD (§DE-TRANSLATION:Aktien-) TARGET ("share" N NOM @>N) ; # share holder, share price #ADD (§DE-TRANSLATION:Aktien-) TARGET ("share" N NOM) (p N) ; ADD (§DE-TRANSLATION:Anteil) TARGET ("share" N S) (-1 ()) ; # get one's share ### moving dependency tree sections #MOVE WITHCHILD (*) TARGET (@>>) ; ### np ### SETPARENT (@>A) TO (*1 ADJ OR (STA) OR ADV) ; SETPARENT (@>N) TO (*1 N OR @FUNC BARRIER (*) - (@>N) - (@>A) - (@CO)) ; SETPARENT (@N<) TO (*-1 N OR ADJ OR DET) ; # circularity-check will make superfluous a LINK NOT 0 @>N SETPARENT (@N - (@SUBJ>) OR @FS-FUNC> OR @ICL-FUNC> OR (@SUB) TO (*1 &MV) ; SETPARENT @)) (NONE p (V)) ; SETPARENT (@FS-N<) TO (**-1 @FUNC) (NOT 0 CLB) ; SETPARENT (@FS-N<) TO (*-1 (@FS-N<) BARRIER VFIN OR CLB) ; SETPARENT (@SUBJ>) (NOT p (V)) TO (*1 &MV) ; # or: LINK -1 (*) LINK *1 &MV) ; ### chaining conjuncts SETPARENT $$@N) OR (@>A) OR (@APP) OR (@N