DELIMITERS = "<$.>" "<$!>" "<$?>" "<$\;>" "<$:>" "<$-->" "<$¶>" "<$start>" "<$START>" ; # define sentence boundary SETS LIST >>> = >>> ; # left boundary (sentence start) LIST <<< = <<< ; # right boundary (sentence end) LIST ALL = N PROP ADJ V PERS INDP SPEC DET ART PRP KC KS NUM IN INFM ; # all word classes LIST ART = ART ; # article LIST QUANT = ; # quantifiers SET DET-QUANT = QUANT + (DET) ; # quantifiers but only determiner pronouns (i.e. not adverbs) LIST VFIN = VFIN PR IMPF FUT COND IMP ; # finite verb: directly, or as a tense tag (PR, IMPF, FUT), conditional (COND) or imperative (IMP) LIST ATTR = ADJ PCP PCP1 PCP2 NUM ; # attributive word classes (adjective, participle, numeral). Note that PCP is a subclass of V (verb) SET NON-ATTR = ALL - ATTR ; # example of "negative" set: everything BUT attributive words LIST @>N = @>N ; # prenominal LIST @N< = @N< ; # postnominal CORRECTIONS SUBSTITUTE (V) (ADJ) TARGET (PCP) OR (PCP2) OR (PCP1) IF (0 @>N OR @N<) ; # works only after parsing SUBSTITUTE (V) (ADJ) TARGET (PCP) OR (PCP2) OR (PCP1) IF (1C (N)) (-1C ART OR (DET)) ; # works after tagging, but only for certain prenominals MAPPINGS MAP (@definite_np) TARGET (N) IF (-1C (ART DEF) OR () OR ( DET) OR ( DET) OR (GEN)) ; # head of definite noun phrase MAP (@indefinite_np) TARGET (N) IF (*-1C (ART IDF) OR () OR DET-QUANT BARRIER NON-ATTR) ; # head of indefinite noun phrase MAP (@first_finite_verb @last_finite_verb @mid_finite_verb) TARGET VFIN ; # verb order markers, for e.g. clause chunking CONSTRAINTS SELECT VFIN (NOT *-1 VFIN) (NOT *1 VFIN) ; # every sentence has at least 1 finite verb ... the rule indirectly REMOVEs all readings that are NOT finite verbs REMOVE VFIN (-1C ART OR () OR (GEN)) ; # it can't be a finite verb, if there is a safe (C) article, possessive or genitive immediately (1) to the left (-). GEN is only relevant for germanic languages. ART has to be SET-defined, because the Portuguese analyzer treats the category as a secondary category of determiner pronouns CONSTRAINTS REMOVE (@first_finite_verb) IF (*-1 VFIN) ; REMOVE (@last_finite_verb) IF (*1 VFIN) ; CONSTRAINTS REMOVE (@mid_finite_verb) (0 (@last_finite_verb) OR (@first_finite_verb)) ; END