Skip to content

Commit ee4e35a

Browse files
committed
Support adjective forms as standalone words
fsa size grows to 7.4MB
1 parent 6541010 commit ee4e35a

File tree

3 files changed

+22
-6
lines changed

3 files changed

+22
-6
lines changed

ninfl/Makefile

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
include ../Makefile.inc
22

33
SUBDIRS = cases
4-
ninfl.a: plural.a adjective.a cases
4+
ninfl.a: plural.a adjective.a standalone-adjective.a cases
5+
standalone-adjective.a: adjective.a
6+
57
.PHONY: all subdirs $(SUBDIRS)
68
subdirs: $(SUBDIRS)
79
$(SUBDIRS):

ninfl/standalone-adjective.fst

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
%
2+
% Even though it is undefined in grammar, Malayalam has popular usage of adjective parts as standalone
3+
% words. For example, ചുക്കു കാപ്പി, മലപ്പുറത്തു നിന്നും, സന്തോഷിനു, മാഷു പറഞ്ഞു...
4+
%
5+
6+
#include "../symbols.fst"
7+
8+
ALPHABET = [#Letters##POS##BM##TMP##Lsym##Numbers##compounds#] <del> <pl> <infl_marker>
9+
10+
#inflboundary# = #POS##BM##TMP#
11+
12+
$pseudo-samvruthokaram-to-u$ = {[#Virama#]}:{} ^-> ([#Letters#]+ __ [#inflboundary#]+ <adj> )
13+
14+
"<adjective.a>" || $pseudo-samvruthokaram-to-u$

noun.fst

+5-5
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ $DERIVEDNOUNS$ = (($VSTEM$ <n> <deriv> ) || $NOUNFROMVERB$) |\
2525
( "<verb-adverbs.a>" <n> <deriv> || $NOUNFROMADVERB$ ) |\
2626
( ($NSTEM$ [<masculine><feminine><neutral>] <n> <deriv>) || $NOUNFROMNOUN$)
2727

28-
$ENDS_WITH_ANUSWARA_FILTER$ = [#Letters#]+[<n><np><RB>]+
29-
$ADJ_CANDIDATES$ = ( $NSTEM$ | $PROPERNOUN$ ) || $ENDS_WITH_ANUSWARA_FILTER$
30-
$ADJ_PART$ = ( $ADJ_CANDIDATES$ <adj>) || "<ninfl/adjective.a>"
31-
3228
$COMPOUND_NSTEM$ = ( ( ( $NSTEM$ | $PROPERNOUN$ ) <adj>)* ( $NSTEM$ | $PROPERNOUN$ ) ) |\ % വിശേഷണവിശേഷ്യങ്ങൾ
3329
( $NSTEM$ <coordinative> $NSTEM$ ) % ദ്വന്ദസമാസം - ആനകുതിര, അച്ഛനമ്മ..
3430
$SINGULAR_NOUN$ = $COMPOUND_NSTEM$ | $PRONOUN$ | $ABBREV$ | $BORROWED$ | $DERIVEDNOUNS$
@@ -38,11 +34,15 @@ $SUFFIXES$ = $POSTPOSITIONS$ | $CONJUNCTION$ | $POLARITY$
3834
$SUFFIXES$ = $SUFFIXES$? $SUFFIXES$ % Atmost 2 times
3935

4036
$NOUN$ = $DEM$ |\
41-
$ADJ_PART$+ |\ % Words ending with adjevtives alone. തീരദേശ, മലയാള, സമുദ്ര etc
4237
( $DEM$? $ADJECTIVE$? ( $SINGULAR_NOUN$ | ( $PLURAL_NOUN$ <EoW><RB> ) ) [#ninfl#]? $SUFFIXES$? )
4338

4439
$NOUN$ = $NOUN$ || $NINFL$
4540

41+
$ENDS_WITH_ANUSWARA_FILTER$ = [#AAsym#]+ [#Letters#]+ [ം്] [#POS##Numbers##infl##TMP##BM#]+
42+
$ADJ_CANDIDATES$ = $NOUN$ || $ENDS_WITH_ANUSWARA_FILTER$
43+
$ADJ_PART$ = ( $ADJ_CANDIDATES$ <adj>) || "<ninfl/standalone-adjective.a>"
44+
$NOUN$ = $NOUN$ | $ADJ_PART$
45+
4646
% $test$ = വഴി<n><RB><locative>കൂടി<cnj><RB> | വഴി<n><RB><locative>
4747
% $ $test$ || $NOUN$ >> "noun.test.a"
4848

0 commit comments

Comments
 (0)