%!PS-Adobe-2.0 %%Creator: dvipsk 5.58f Copyright 1986, 1994 Radical Eye Software %%Title: paper.dvi %%Pages: 8 %%PageOrder: Ascend %%BoundingBox: 0 0 596 842 %%DocumentFonts: Times-Bold Times-Roman Courier Times-Italic %%DocumentPaperSizes: a4 %%EndComments %DVIPSCommandLine: dvips paper.dvi %DVIPSParameters: dpi=300, compressed, comments removed %DVIPSSource: TeX output 2000.03.10:1510 %%BeginProcSet: texc.pro /TeXDict 250 dict def TeXDict begin /N{def}def /B{bind def}N /S{exch}N /X{S N}B /TR{translate}N /isls false N /vsize 11 72 mul N /hsize 8.5 72 mul N /landplus90{false}def /@rigin{isls{[0 landplus90{1 -1}{-1 1} ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[matrix currentmatrix{dup dup round sub abs 0.00001 lt{round}if} forall round exch round exch]setmatrix}N /@landscape{/isls true N}B /@manualfeed{statusdict /manualfeed true put}B /@copies{/#copies X}B /FMat[1 0 0 -1 0 0]N /FBB[0 0 0 0]N /nn 0 N /IE 0 N /ctr 0 N /df-tail{ /nn 8 dict N nn begin /FontType 3 N /FontMatrix fntrx N /FontBBox FBB N string /base X array /BitMaps X /BuildChar{CharBuilder}N /Encoding IE N end dup{/foo setfont}2 array copy cvx N load 0 nn put /ctr 0 N[}B /df{ /sf 1 N /fntrx FMat N df-tail}B /dfs{div /sf X /fntrx[sf 0 0 sf neg 0 0] N df-tail}B /E{pop nn dup definefont setfont}B /ch-width{ch-data dup length 5 sub get}B /ch-height{ch-data dup length 4 sub get}B /ch-xoff{ 128 ch-data dup length 3 sub get sub}B /ch-yoff{ch-data dup length 2 sub get 127 sub}B /ch-dx{ch-data dup length 1 sub get}B /ch-image{ch-data dup type /stringtype ne{ctr get /ctr ctr 1 add N}if}B /id 0 N /rw 0 N /rc 0 N /gp 0 N /cp 0 N /G 0 N /sf 0 N /CharBuilder{save 3 1 roll S dup /base get 2 index get S /BitMaps get S get /ch-data X pop /ctr 0 N ch-dx 0 ch-xoff ch-yoff ch-height sub ch-xoff ch-width add ch-yoff setcachedevice ch-width ch-height true[1 0 0 -1 -.1 ch-xoff sub ch-yoff .1 sub]/id ch-image N /rw ch-width 7 add 8 idiv string N /rc 0 N /gp 0 N /cp 0 N{rc 0 ne{rc 1 sub /rc X rw}{G}ifelse}imagemask restore}B /G{{id gp get /gp gp 1 add N dup 18 mod S 18 idiv pl S get exec}loop}B /adv{cp add /cp X}B /chg{rw cp id gp 4 index getinterval putinterval dup gp add /gp X adv}B /nd{/cp 0 N rw exit}B /lsh{rw cp 2 copy get dup 0 eq{pop 1}{ dup 255 eq{pop 254}{dup dup add 255 and S 1 and or}ifelse}ifelse put 1 adv}B /rsh{rw cp 2 copy get dup 0 eq{pop 128}{dup 255 eq{pop 127}{dup 2 idiv S 128 and or}ifelse}ifelse put 1 adv}B /clr{rw cp 2 index string putinterval adv}B /set{rw cp fillstr 0 4 index getinterval putinterval adv}B /fillstr 18 string 0 1 17{2 copy 255 put pop}for N /pl[{adv 1 chg} {adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{ adv rsh nd}{1 add adv}{/rc X nd}{1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]dup{bind pop}forall N /D{/cc X dup type /stringtype ne{] }if nn /base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{dup dup length 1 sub dup 2 index S get sf div put}if put /ctr ctr 1 add N}B /I{ cc 1 add D}B /bop{userdict /bop-hook known{bop-hook}if /SI save N @rigin 0 0 moveto /V matrix currentmatrix dup 1 get dup mul exch 0 get dup mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N /eop{SI restore userdict /eop-hook known{eop-hook}if showpage}N /@start{userdict /start-hook known{start-hook}if pop /VResolution X /Resolution X 1000 div /DVImag X /IE 256 array N 0 1 255{IE S 1 string dup 0 3 index put cvn put}for 65781.76 div /vsize X 65781.76 div /hsize X}N /p{show}N /RMat[1 0 0 -1 0 0]N /BDot 260 string N /rulex 0 N /ruley 0 N /v{/ruley X /rulex X V}B /V {}B /RV statusdict begin /product where{pop product dup length 7 ge{0 7 getinterval dup(Display)eq exch 0 4 getinterval(NeXT)eq or}{pop false} ifelse}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale rulex ruley false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR rulex ruley scale 1 1 false RMat{BDot}imagemask grestore}}ifelse B /QV{gsave newpath transform round exch round exch itransform moveto rulex 0 rlineto 0 ruley neg rlineto rulex neg 0 rlineto fill grestore}B /a{moveto}B /delta 0 N /tail {dup /delta X 0 rmoveto}B /M{S p delta add tail}B /b{S p tail}B /c{-4 M} B /d{-3 M}B /e{-2 M}B /f{-1 M}B /g{0 M}B /h{1 M}B /i{2 M}B /j{3 M}B /k{ 4 M}B /w{0 rmoveto}B /l{p -4 w}B /m{p -3 w}B /n{p -2 w}B /o{p -1 w}B /q{ p 1 w}B /r{p 2 w}B /s{p 3 w}B /t{p 4 w}B /x{0 S rmoveto}B /y{3 2 roll p a}B /bos{/SS save N}B /eos{SS restore}B end %%EndProcSet %%BeginFont: Times-Bold % @@psencodingfile@{ % author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry", % version = "0.6", % date = "22 June 1996", % filename = "8r.enc", % email = "kb@@mail.tug.org", % address = "135 Center Hill Rd. // Plymouth, MA 02360", % codetable = "ISO/ASCII", % checksum = "119 662 4424", % docstring = "Encoding for TrueType or Type 1 fonts to be used with TeX." % @} % % Idea is to have all the characters normally included in Type 1 fonts % available for typesetting. This is effectively the characters in Adobe % Standard Encoding + ISO Latin 1 + extra characters from Lucida. % % Character code assignments were made as follows: % % (1) the Windows ANSI characters are almost all in their Windows ANSI % positions, because some Windows users cannot easily reencode the % fonts, and it makes no difference on other systems. The only Windows % ANSI characters not available are those that make no sense for % typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen % (173). quotesingle and grave are moved just because it's such an % irritation not having them in TeX positions. % % (2) Remaining characters are assigned arbitrarily to the lower part % of the range, avoiding 0, 10 and 13 in case we meet dumb software. % % (3) Y&Y Lucida Bright includes some extra text characters; in the % hopes that other PostScript fonts, perhaps created for public % consumption, will include them, they are included starting at 0x12. % % (4) Remaining positions left undefined are for use in (hopefully) % upward-compatible revisions, if someday more characters are generally % available. % % (5) hyphen appears twice for compatibility with both ASCII and Windows. % /TeXBase1Encoding [ % 0x00 (encoded characters from Adobe Standard not in Windows 3.1) /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef % These are the only two remaining unencoded characters, so may as % well include them. /Zcaron /zcaron % 0x10 /caron /dotlessi % (unusual TeX characters available in, e.g., Lucida Bright) /dotlessj /ff /ffi /ffl /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef % very contentious; it's so painful not having quoteleft and quoteright % at 96 and 145 that we move the things normally found there down to here. /grave /quotesingle % 0x20 (ASCII begins) /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash % 0x30 /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question % 0x40 /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O % 0x50 /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore % 0x60 /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o % 0x70 /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef % rubout; ASCII ends % 0x80 /.notdef /.notdef /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /.notdef /.notdef /.notdef % 0x90 /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /.notdef /.notdef /Ydieresis % 0xA0 /.notdef % nobreakspace /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen % Y&Y (also at 45); Windows' softhyphen /registered /macron % 0xD0 /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown % 0xC0 /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis % 0xD0 /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls % 0xE0 /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis % 0xF0 /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] def %%EndFont %%BeginProcSet: texps.pro TeXDict begin /rf{findfont dup length 1 add dict begin{1 index /FID ne 2 index /UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics exch def dict begin Encoding{exch dup type /integertype ne{pop pop 1 sub dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def} ifelse}forall Metrics /Metrics currentdict end def[2 index currentdict end definefont 3 -1 roll makefont /setfont load]cvx def}def /ObliqueSlant{dup sin S cos div neg}B /SlantFont{4 index mul add}def /ExtendFont{3 -1 roll mul exch}def /ReEncodeFont{/Encoding exch def}def end %%EndProcSet TeXDict begin 39158280 55380996 1000 300 300 (paper.dvi) @start /Fa 2 117 df<121E12065AA45AEA19E0EA1E30121812381230A3EA6060136413 C413C812C013700E147E9313>104 D<1204120CA35AEAFF80EA1800A25AA45A1261A212 621264123809127F910D>116 D E /Fb 1 7 df<1306ADB612E0A2D80006C7FCABB612E0 A21B1C7E9A20>6 D E /Fc 134[18 18 28 18 21 12 16 16 1[21 21 21 30 12 18 12 12 21 21 12 18 21 18 21 21 9[35 25 30 23 21 25 1[25 30 28 35 23 1[18 14 1[30 1[25 30 28 1[25 6[14 21 21 21 21 21 1[21 21 21 21 12 10 14 10 4[14 36[21 2[{ TeXBase1Encoding ReEncodeFont }58 41.666669 /Times-Italic rf /Fd 139[14 16 13[18 23 2[21 39[21 21 21 21 21 21 21 21 21 21 1[10 46[{ TeXBase1Encoding ReEncodeFont }16 41.666669 /Times-Bold rf /Fe 1 51 df<121FEA6180EA40C0EA806012C01200A213 C0EA0180EA030012065AEA10201220EA7FC012FF0B107F8F0F>50 D E /Ff 1 32 df<001E131000271320EA478000031340EBC0803801C10013C213E4EA00 E813F0A21370A213F0EA0178EA02381204EA083CEA101CA238201E2038400F40388003C0 14177F8F18>31 D E /Fg 107[17 17 25[19 19 27 19 19 10 15 12 1[19 19 19 29 10 19 1[10 19 19 12 17 19 17 19 17 9[35 3[21 25 6[27 1[12 1[27 21 1[27 2[27 7[19 2[19 1[19 2[19 19 1[9 12 9 2[12 12 1[29 35[21 2[{ TeXBase1Encoding ReEncodeFont } 46 37.500000 /Times-Roman rf /Fh 204[12 12 12 49[{ TeXBase1Encoding ReEncodeFont }3 25.000001 /Times-Roman rf /Fi 204[17 17 17 49[{ TeXBase1Encoding ReEncodeFont }3 33.333334 /Times-Roman rf /Fj 11 62 df37 D48 D<13801203120F12F31203B3A6EA07C0EAFFFE0F1E7C9D17>III<1306A2 130EA2131E132EA2134E138EA2EA010E1202A212041208A212101220A2124012C0B512F0 38000E00A7EBFFE0141E7F9D17>II<137CEA0182EA0701380E0380EA0C0712183838030090C7FC12 781270A2EAF1F0EAF21CEAF406EAF807EB0380A200F013C0A51270A214801238EB070012 18EA0C0E6C5AEA01F0121F7E9D17>I<1240387FFFE014C0A23840008038800100A21302 485AA25B5BA25BA21360A213E05B1201A41203A76C5A131F7E9D17>I57 D61 D E /Fk 173[24 11[20 22 69[{ TeXBase1Encoding ReEncodeFont }3 36.499976 /Times-Roman rf /Fl 7 113 df<00061370380701F8380E02F0EB0C60EB10005B485A 5B001FC7FC13F8EA381E1307A2EB038438700708A3EB031012E0386001E016147E931A> 20 D<127012F8A3127005057C840D>58 D<15C01403EC0F00143C1470495AEB0780011E C7FC1378EA01E0EA0380000FC8FC123C12F0A2123C120FEA0380EA01E0EA0078131EEB07 80EB01E0EB0070143C140FEC03C014001A1C7C9823>60 D<12C012F0123C120FEA0380EA 01E0EA0078131EEB0780EB01E0EB0070143C140FEC03C0A2EC0F00143C1470495AEB0780 011EC7FC1378EA01E0EA0380000FC8FC123C12F012C01A1C7C9823>62 D<027F1380903803C0C190390E0023000138131749130F5B48481306485A48C7FC5A000E 1404121E4891C7FCA25AA45AA400701420A35D6C5CA26C49C7FC6C13066C13183801C060 38007F8021217F9F21>67 D<381E0780382318C0EBA0603843C0701380A2388700E01207 A3380E01C0A3EB0382001C1384EB07041408130300381310381801E017147E931B>110 D<3803C1E038046218EB741CEA0878EB701EA2EA10E01200A33801C03CA3143838038078 147014E0EBC1C038072380EB1E0090C7FCA2120EA45AA2EAFFC0171D819317>112 D E /Fm 2 63 df<126012F0A2126004047C830C>58 D<12C012F0123C120FEA03C0EA00 F0133C130E6D7EEB01E0EB0078141EEC0780A2EC1E001478EB01E0EB0780010EC7FC133C 13F0EA03C0000FC8FC123C12F012C0191A7D9620>62 D E /Fn 2 62 df<000F14C038188001393060038038703807396027FF0038E01006140E5C14181438 14301470386020E000705BEA30416C485A390F0303C0390007062090380E0C1090380C1C 08EB1C18903838380413301370136013E0EA01C013800003EB18083807001C0006EB0C10 000EEB0620000CEB03C01E217E9E23>37 D61 D E /Fo 75[14 11[14 17[21 1[18 18 24[18 21 21 30 21 21 12 16 14 21 21 21 21 32 12 21 12 12 21 21 14 18 21 18 21 18 7[30 1[39 30 30 25 23 28 30 23 30 30 37 25 30 16 14 30 30 23 25 30 28 28 30 1[18 1[23 2[12 21 21 21 21 21 21 21 21 21 21 12 10 14 10 23 1[14 14 1[32 35 34[23 2[{ TeXBase1Encoding ReEncodeFont }78 41.666669 /Times-Roman rf /Fp 133[20 2[33 23 25 15 18 20 1[25 23 25 38 13 25 1[13 25 23 15 20 25 20 1[23 9[45 2[30 1[33 1[28 35 33 4[18 35 1[28 2[33 30 33 1[23 4[15 5[23 1[23 23 2[11 15 42[25 2[{ TeXBase1Encoding ReEncodeFont }41 45.624988 /Times-Bold rf /Fq 138[23 13 18 18 23 23 23 23 33 13 2[13 23 23 13 20 23 20 23 23 29[30 8[15 11[11 15 7[38 37[{ TeXBase1Encoding ReEncodeFont }24 45.624988 /Times-Italic rf /Fr 105[23 1[20 20 24[20 23 23 33 23 23 13 18 15 23 23 23 23 35 13 23 13 13 23 23 15 20 23 20 23 20 7[33 1[43 33 33 28 25 30 33 25 33 33 40 28 33 18 15 33 33 25 28 33 30 30 33 1[20 3[13 13 23 23 23 23 23 23 23 23 23 23 13 11 15 11 2[15 15 15 35 38 33[25 25 2[{ TeXBase1Encoding ReEncodeFont }77 45.624988 /Times-Roman rf /Fs 136[25 25 25 25 25 25 1[25 25 25 25 25 25 1[25 25 25 1[25 25 25 25 25 13[25 5[25 12[25 5[25 10[25 25 25 45[{ TeXBase1Encoding ReEncodeFont }27 41.666669 /Courier rf /Ft 32[25 54[17 46[25 1[36 25 25 14 19 17 1[25 25 25 39 14 25 1[14 25 25 17 22 25 22 25 22 9[47 36 36 1[28 2[28 2[44 30 36 1[17 36 36 1[30 2[33 36 7[25 25 25 25 25 25 25 1[25 2[12 17 12 44[{ TeXBase1Encoding ReEncodeFont }49 50.000001 /Times-Roman rf /Fu 136[36 1[28 17 19 22 1[28 25 28 1[14 28 1[14 28 1[17 22 28 22 28 25 9[50 2[33 28 36 1[30 2[47 3[19 1[39 30 1[36 2[36 11[25 25 25 25 25 3[17 45[{ TeXBase1Encoding ReEncodeFont }35 50.000001 /Times-Bold rf /Fv 133[27 5[20 23 27 1[33 30 33 50 17 2[17 2[20 27 33 27 33 30 16[37 2[56 3[23 1[47 5[43 19[20 45[{ TeXBase1Encoding ReEncodeFont }22 59.999974 /Times-Bold rf end %%EndProlog %%BeginSetup %%Feature: *Resolution 300dpi TeXDict begin %%PaperSize: a4 %%BeginPaperSize: a4 a4 %%EndPaperSize %%EndSetup %%Page: 1 1 1 0 bop 161 79 a Fv(A)15 b(Pr)o(obabilistic)d(Genr)o(e-Independent)h (Model)i(of)g(Pr)o(onominalizati)o(on)392 193 y Fu(Michael)d(Strube)194 247 y Ft(European)g(Media)h(Laboratory)f(GmbH)433 300 y(V)m(illa)g(Bosch)260 354 y(Schlo\337-W)l(olfsbrunnenwe)o(g)e(33)267 408 y(69118)i(Heidelber)o(g,)g(German)o(y)139 462 y Fs (Michael.Strube@eml.villa-bosch.de)1245 193 y Fu(Maria)g(W)l(olters)938 247 y Ft(Inst.)h(f.)f(K)n(ommunikationsforschung)f(u.)i(Phonetik)1230 300 y(Uni)o(v)o(ersit)s(\250)-20 b(at)12 b(Bonn)1169 354 y(Poppelsdorfer)f(Allee)h(47)1173 408 y(53115)g(Bonn,)h(German)o(y) 1114 462 y Fs(wolters@ikp.uni-bonn.de)376 646 y Fu(Abstract)0 712 y Fr(Our)34 b(aim)g(in)g(this)f(paper)h(is)f(to)h(identify)e (genre-)0 764 y(independent)17 b(factors)i(that)f(in\003uence)h(the)f (decision)g(to)0 817 y(pronominalize.)k(Results)14 b(based)f(on)h(the)g (annotation)f(of)0 869 y(twelv)o(e)g(te)o(xts)f(from)h(four)g(genres)g (sho)o(w)f(that)g(only)g(a)i(fe)o(w)0 921 y(factors)j(ha)o(v)o(e)h(a)g Fq(str)n(ong)d Fr(in\003uence)j(on)f(pronominaliza-)0 973 y(tion)d(across)h(genres,)h(i.e.)f(distance)f(from)i(last)e (mention,)0 1026 y(agreement,)g(and)e(form)h(of)f(the)g(antecedent.)17 b(Finally)m(,)c(we)0 1078 y(describe)d(a)h(probabilistic)d(model)j(of)f (pronominalization)0 1130 y(deri)o(v)o(ed)h(from)h(our)f(data.)0 1222 y Fu(1)50 b(Intr)o(oduction)0 1288 y Fr(Generating)11 b(adequate)h(referring)h(e)o(xpressions)e(is)h(an)g(ac-)0 1340 y(ti)o(v)o(e)e(research)h(topic)f(in)g(Natural)g(Language)g (Generation.)0 1392 y(Adequate)18 b(referring)i(e)o(xpressions)d(are)j (those)e(that)g(en-)0 1445 y(able)10 b(the)g(user)g(to)f(quickly)g(and) h(unambiguously)d(identify)0 1497 y(the)12 b(discourse)f(entity)g(that) h(the)g(e)o(xpression)f(co-speci\002es)0 1549 y(with.)16 b(In)c(this)f(paper)n(,)j(we)e(concentrate)g(on)f(an)i(important)0 1602 y(aspect)h(of)f(that)h(question,)f(which)g(has)g(recei)o(v)o(ed)h (less)g(at-)0 1654 y(tention)f(than)h(the)g(question)e(of)j(anaphora)f (resolution)e(in)0 1706 y(discourse)i(interpretation,)h(i.e.,)j(when)d (is)g(it)g(feasible)g(to)0 1759 y(pronominalize?)45 1811 y(Our)f(aim)g(is)f(to)h(identify)e(the)h(central)h(factors)f(that)g (in-)0 1863 y(\003uence)g(pronominalization)d(across)j(genres.)19 b(Section)12 b(2)0 1915 y(moti)o(v)o(ates)e(and)h(presents)f(the)h (factors)f(that)h(were)h(in)n(v)o(esti-)0 1968 y(gated)f(in)f(this)g (study:)j(distance)d(from)i(last)e(mention,)h(par)o(-)0 2020 y(allelism,)16 b(ambiguity)m(,)f(syntactic)f(function,)h (agreement,)0 2072 y(sortal)9 b(class,)h(syntactic)f(function)f(of)i (the)g(antecedent)f(and)0 2125 y(form)14 b(of)g(the)g(antecedent.)22 b(Our)14 b(analyses)f(are)h(based)g(on)0 2177 y(a)g(corpus)g(of)g (twelv)o(e)g(te)o(xts)f(from)i(four)f(dif)o(ferent)g(genres)0 2229 y(with)i(a)h(total)f(of)h(more)h(than)e(24,000)g(words)g(and)h (7126)0 2282 y(referring)j(e)o(xpressions)e(\(Section)i(3\).)39 b(The)20 b(results)f(of)0 2334 y(the)e(statistical)f(analyses)h(are)h (summarized)g(in)f(Section)0 2386 y(4.)j(There)13 b(are)g(strong)f (statistical)f(associations)g(between)0 2438 y(each)20 b(of)f(the)g(factors)g(and)g(pronominalization.)37 b(Only)0 2491 y(when)15 b(we)g(combine)g(them)g(into)f(a)h(probabilistic)e (model)0 2543 y(we)21 b(can)f(identify)f(those)h(factors)f(whose)h (contrib)o(ution)0 2595 y(is)13 b(really)h(important,)g(i.e.)g (distance)f(from)i(last)e(mention,)0 2648 y(agreement,)j(and)e(to)g(a)h (certain)f(de)o(gree)h(form)g(of)f(the)g(an-)0 2700 y(tecedent.)24 b(Since)14 b(these)g(factors)g(can)h(be)f(annotated)f(rel-)1013 646 y(ati)o(v)o(ely)j(cheaply)m(,)i(we)g(conclude)e(that)h(it)g(is)f (possible)g(to)1013 698 y(de)o(v)o(elop)11 b(reasonable)g(statistical)f (pronominalization)f(al-)1013 750 y(gorithms.)1013 843 y Fu(2)49 b(F)o(actors)13 b(in)f(Pr)o(onoun)g(Generation)1013 910 y Fp(2.1)45 b(Pr)o(e)o(vious)11 b(W)m(ork)1013 974 y Fr(Lately)m(,)j(a)h(number)g(of)f(researchers)h(ha)o(v)o(e)g(done)f (corpus-)1013 1026 y(based)g(work)f(on)h(NP)h(generation)e(and)i (pronoun)e(resolu-)1013 1079 y(tion,)8 b(and)h(a)h(number)f(of)h (studies)d(ha)o(v)o(e)j(found)e(dif)o(ferences)1013 1131 y(in)h(the)h(frequenc)o(y)g(of)g(both)f(personal)g(and)h(demonstrati)o (v)o(e)1013 1183 y(pronouns)16 b(across)j(genres.)36 b(Ho)o(we)o(v)o(er)n(,)21 b(none)d(of)h(these)1013 1235 y(studies)14 b(compares)j(the)f(in\003uence)g(of)g(dif)o(ferent)g (factors)1013 1288 y(on)10 b(pronoun)g Fq(gener)o(ation)f Fr(across)i(genres.)1058 1340 y(Recently)m(,)17 b(Poesio)e(et)h(al.)28 b(\(1999\))15 b(ha)o(v)o(e)h(described)f(a)1013 1392 y(corpus-based)c(approach)h(to)g(statistical)e(NP)j(generation.)1013 1445 y(While)j(the)o(y)i(ask)f(the)g(same)h(question)e(as)h(pre)o (vious)g(re-)1013 1497 y(searchers)i(\(e.g.)h(Dale)f(\(1992\)\),)h (their)f(methods)f(dif)o(fer)1013 1549 y(from)i(traditional)e(work)h (on)h(NP)g(generation.)40 b(Poesio)1013 1602 y(et)15 b(al.)28 b(\(1999\))15 b(use)h(two)f(kinds)f(of)i(factors:)22 b(\(1\))16 b(factors)1013 1654 y(related)9 b(to)h(the)g(NP)g(under)g (consideration)e(such)h(as)h(agree-)1013 1706 y(ment)16 b(information,)i(semantic)e(factors,)i(and)e(discourse)1013 1759 y(factors,)21 b(and)e(\(2\))h(factors)f(related)h(to)f(the)g (antecedent,)1013 1811 y(such)8 b(as)h(animac)o(y)m(,)i(clause)e(type,) g(thematic)g(role,)h(proxim-)1013 1863 y(ity)m(,)e(etc.)14 b(Poesio)8 b(et)g(al.)14 b(\(1999\))7 b(report)i(that)f(the)o(y)g(were) h(not)1013 1915 y(able)k(to)g(annotate)f(man)o(y)i(of)f(these)g (factors)g(reliably)m(.)19 b(On)1013 1968 y(the)12 b(basis)h(of)g (these)f(annotations,)g(the)o(y)h(constructed)f(de-)1013 2020 y(cision)c(trees)j(for)f(predicting)e(surface)i(forms)h(of)f (referring)1013 2072 y(e)o(xpressions)j(based)i(on)g(these)g(factors)f (-)i(with)e(good)h(re-)1013 2125 y(sults:)e(all)e(28)h(personal)f (pronouns)f(in)h(their)h(corpus)f(were)1013 2177 y(generated)d (correctly)m(.)13 b(Unfortunately)m(,)8 b(the)o(y)g(do)h(not)e(e)o(v)o (al-)1013 2229 y(uate)k(the)g(contrib)o(ution)e(of)i(each)h(of)g(these) f(factors,)g(so)g(we)1013 2282 y(do)f(not)h(kno)o(w)f(which)g(ones)h (are)h(important.)1058 2334 y(W)l(ork)20 b(on)f(corpus-based)g (approaches)h(to)f(anaphora)1013 2386 y(resolution)f(is)i(more)i (numerous.)42 b(Ge)21 b(et)g(al.)43 b(\(1998\))1013 2438 y(describe)12 b(a)i(supervised)d(probabilistic)g(pronoun)h(resolu-)1013 2491 y(tion)h(algorithm)g(which)h(is)g(based)g(on)g(complete)g(syntac-) 1013 2543 y(tic)i(information.)29 b(The)17 b(factors)f(the)o(y)h(use)f (include)g(dis-)1013 2595 y(tance)9 b(from)i(last)e(mention,)h (syntactic)e(function)h(and)g(con-)1013 2648 y(te)o(xt,)16 b(agreement)g(information,)g(animac)o(y)g(of)g(the)f(refer)o(-)1013 2700 y(ent,)j(a)g(simpli\002ed)f(notion)e(of)j(selectional)e (restrictions,)p eop %%Page: 2 2 2 1 bop 0 2 968 2 v 25 34 a Fo(Agree)119 b(Agreement)11 b(in)e(person,)i(gender)n(,)g(and)f(number)25 81 y(Syn)155 b(Syntactic)10 b(function)25 128 y(Class)130 b(Sortal)9 b(Class)i(\(cf.)i(T)m(ab)n(.)h(2\))25 175 y(SynAnte)74 b(Syntactic)10 b(function)e(of)i(antecedent.)245 222 y(\223F\224)h(for)f(\002rst)g(mention,)g(\223N\224)h(for)f(deadend)25 269 y(F)o(ormAnte)50 b(F)o(orm)10 b(of)g(antecedent)h(\(pers.)g(pron.,) f(poss.)245 316 y(pron.,)g(def.)j(NP)-5 b(,)11 b(indef.)i(NP)-5 b(,)11 b(proper)f(name\))25 363 y(Dist)150 b(Distance)10 b(to)g(last)g(mention)g(in)f(units)25 410 y(Dist4)129 b(Dist)9 b(reduced)i(to)f(4)g(v)o(alues)g(\(deadend,)245 457 y(Dist)p Fn(=)p Fo(0,)g(Dist)p Fn(=)p Fo(1,)g(Dist)p Fm(>)p Fn(=)p Fo(2\))25 504 y(P)o(ar)166 b(P)o(arallelism)10 b(\(Syn=SynAnte\))25 550 y(Ambig)104 b(Number)10 b(of)g(competing)g (discourse)g(entities)p 0 566 V 208 664 a Fr(T)l(able)h(1:)i(Ov)o (ervie)o(w)e(of)g(factors)0 776 y(and)k(the)f(length)g(of)h(the)g (coreference)h(chain.)25 b(Cardie)15 b(&)0 829 y(W)l(agstaf)o(f)9 b(\(1999\))e(describe)i(an)f(unsupervised)f(algorithm)0 881 y(for)15 b(noun)f(phrase)h(coreference)h(resolution.)24 b(Their)14 b(fac-)0 933 y(tors)8 b(are)i(taken)e(from)i(Ge)f(et)g(al.)k (\(1998\),)c(with)f(two)g(e)o(xcep-)0 986 y(tions.)25 b(First,)16 b(the)o(y)f(replace)g(complete)g(syntactic)f(infor)o(-)0 1038 y(mation)d(with)f(information)h(about)f(NP)i(bracketing.)i(Sec-)0 1090 y(ond,)h(the)o(y)e(use)h(the)g(sortal)f(class)h(of)g(the)g (referent)g(which)0 1142 y(the)o(y)e(determine)g(on)f(the)h(basis)f(of) h(W)l(ordNet)f(\(Fellbaum,)0 1195 y(1998\).)45 1247 y(There)16 b(has)f(been)h(no)f(comparison)g(between)g(corpus-)0 1299 y(based)c(approaches)g(for)h(anaphora)f(resolution)e(and)i(more)0 1352 y(traditional)19 b(algorithms)h(based)h(on)g(focusing)f(\(Sidner)n (,)0 1404 y(1983\))c(or)h(centering)f(\(Grosz)g(et)h(al.,)h(1995\))e(e) o(xcept)h(for)0 1456 y(Azzam)h(et)e(al.)31 b(\(1998\).)g(Ho)o(we)o(v)o (er)n(,)18 b(their)e(comparison)0 1509 y(is)e(\003a)o(wed)i(by)e(e)o(v) o(aluating)f(a)i(syntax-based)f(focus)g(algo-)0 1561 y(rithm)e(on)f(the)h(basis)f(of)h(insuf)o(\002cient)f(syntactic)f (informa-)0 1613 y(tion.)k(F)o(or)e(pronoun)e(generation,)g(the)h (original)f(centering)0 1665 y(model)j(\(Grosz)f(et)h(al.,)h(1995\))e (pro)o(vides)g(a)h(rule)g(which)f(is)0 1718 y(supposed)i(to)h(decide)h (whether)f(a)h(referring)g(e)o(xpression)0 1770 y(has)d(to)f(be)h (realized)g(as)g(a)g(pronoun.)18 b(Ho)o(we)o(v)o(er)n(,)c(this)e(rule)0 1822 y(applies)e(only)f(to)i(the)f(referring)h(e)o(xpression)f(which)g (is)g(the)0 1875 y(backward-looking)j(center)i(\()p Fq(Cb)p Fr(\))h(of)g(the)f(current)g(utter)o(-)0 1927 y(ance.)20 b(W)n(ith)12 b(respect)h(to)g(all)f(other)h(referring)g(e)o(xpression)0 1979 y(in)e(this)f(utterance)h(centering)f(is)h(underspeci\002ed.)45 2032 y(Y)-5 b(eh)22 b(&)f(Mellish)f(\(1997\))h(propose)f(a)h(set)g(of)g (hand-)0 2084 y(crafted)d(rules)f(for)h(the)f(generation)g(of)h (anaphora)f(\(zero)0 2136 y(and)12 b(personal)f(pronouns,)f(full)h (NPs\))h(in)f(Chinese.)16 b(Ho)o(w-)0 2189 y(e)o(v)o(er)n(,)21 b(the)d(factors)g(which)g(appear)g(to)g(be)h(important)e(in)0 2241 y(their)25 b(e)o(v)o(aluation)e(are)j(similar)f(to)g(factors)f (described)0 2293 y(by)d(authors)g(mentioned)f(abo)o(v)o(e:)35 b(distance,)24 b(syntactic)0 2345 y(constraints)16 b(on)i(zero)h (pronouns,)g(discourse)e(structure,)0 2398 y(salience)11 b(and)g(animac)o(y)g(of)h(discourse)e(entities.)0 2479 y Fp(2.2)45 b(Our)12 b(F)o(actors)0 2543 y Fr(The)f(factors)f(we)g(in)n (v)o(estigate)f(in)i(this)e(paper)i(only)f(rely)g(on)0 2595 y(annotations)j(of)j(NPs)f(and)h(their)f(co-speci\002cation)f (rela-)0 2648 y(tions.)e(W)l(e)f(did)e(not)h(add)f(an)o(y)h(discourse)f (structural)g(anno-)0 2700 y(tation,)i(because)h(\(1\))g(the)f(te)o (xts)g(are)h(e)o(xtracts)g(from)g(lar)o(ger)1013 46 y(te)o(xts)f(which) g(are)i(not)e(a)o(v)o(ailable)h(to)f(us,)h(and)g(\(2\))h(we)f(ha)o(v)o (e)1013 98 y(not)c(yet)h(found)f(a)h(labelling)f(scheme)h(for)h (discourse)e(struc-)1013 150 y(ture)i(that)f(has)h(an)g(inter)o(-coder) g(reliability)e(comparable)j(to)1013 203 y(the)f(MUC)i(coreference)h (annotation)c(scheme.)1058 256 y(Based)i(on)f(our)g(re)o(vie)o(w)h(of)f (the)h(literature)e(and)i(rele)o(v)o(ant)1013 308 y(work)f(in)i (linguistics)c(\(for)k(sortal)f(class,)h(mainly)f(Fraurud)1013 360 y(\(1996\))k(and)g(Fellbaum)h(\(1998\)\),)g(we)g(ha)o(v)o(e)g (chosen)f(the)1013 413 y(nine)10 b(factors)h(listed)f(in)h(T)l(able)g (1.)k(Methodologically)m(,)9 b(we)1013 465 y(distinguis)o(h)f(two)i (kinds)g(of)i(factors:)1013 535 y Fp(NP-le)o(vel)41 b(factors)k Fr(are)f(independent)d(from)i(co-)1013 588 y(speci\002cation)32 b(relations.)79 b(The)o(y)34 b(depend)e(on)h(the)1013 640 y(semantics)18 b(of)h(the)g(discourse)f(entity)g(or)h(on)f (discourse)1013 692 y(information)g(supplied)f(for)j(the)f(NP)g (generation)g(algo-)1013 744 y(rithm)e(by)h(the)g(NLG)g(system.)35 b(T)l(ypical)17 b(e)o(xamples)i(are)1013 797 y(NP)12 b(agreement)h(by)e(gender)n(,)i(number)n(,)g(person)e(and)h(case,)1013 849 y(the)19 b(syntactic)f(function)g(of)i(the)f(NP)h(\(subject,)h (object,)1013 901 y(PP)14 b(adjunct,)g(other\),)g(the)f(sortal)g(class) g(of)h(the)f(discourse)1013 954 y(entity)d(to)g(which)h(an)g(NP)h (refers,)g(discourse)e(structure,)h(or)1013 1006 y(topicality)d(of)j (the)g(discourse)e(entities.)k(In)e(this)e(paper)n(,)j(we)1013 1058 y(focus)h(on)h(the)f(\002rst)h(three)g(factors,)h(agreement)f (\(Agree\),)1013 1111 y(syntactic)9 b(function)h(\(Syn\),)i(and)f (sortal)f(class)h(\(Class\).)1058 1164 y(Since)17 b(we)g(are)g(using)f (syntactically)f(annotated)g(data)1013 1216 y(in)d(the)h(Penn)h(T)n (reebank-II)g(format,)g(the)f(syntactic)f(func-)1013 1268 y(tion)h(of)i(an)g(NP)g(was)g(deri)o(v)o(ed)f(from)i(these)e (annotations.)1013 1321 y(Agreement)f(for)h(gender)n(,)g(number)n(,)g (and)g(person)e(was)h(la-)1013 1373 y(belled)e(by)h(hand.)18 b(Since)12 b(English)f(has)h(almost)f(no)h(nomi-)1013 1425 y(nal)e(case)i(morphemes,)g(case)f(was)g(not)f(annotated.)1058 1478 y(Sortal)j(classes)f(pro)o(vide)h(information)f(about)g(the)h (dis-)1013 1530 y(course)i(entity)g(that)g(a)h(referring)h(e)o (xpression)d(e)o(v)o(okes)h(or)1013 1583 y(accesses.)f(The)e(classes,)f (summarized)h(in)f(T)l(able)g(2,)h(were)1013 1635 y(deri)o(v)o(ed)27 b(from)h(EuroW)l(ordNet)e(BaseT)l(ypes)i(\(V)-6 b(ossen,)1013 1687 y(1998\))19 b(and)h(are)h(de\002ned)f(e)o(xtensionally)e(on)i(the) g(basis)1013 1740 y(of)12 b(W)l(ordNet)g(synsets.)17 b(Their)12 b(selection)g(was)f(moti)o(v)o(ated)1013 1792 y(by)i(two)h(main)g(considerations:)k(all)c(classes)f(should)g(oc-)1013 1844 y(cur)g(in)g(all)g(genres,)h(and)g(the)f(number)g(of)h(classes)f (should)1013 1897 y(be)i(as)g(small)f(as)h(possible)e(in)i(order)g(to)f (a)o(v)o(oid)h(problems)1013 1949 y(with)9 b(sparse)i(data.)j(F)o(our)d (classes,)f(State,)h(Ev)o(ent,)g(Action,)1013 2001 y(and)21 b(Property)m(,)j(co)o(v)o(er)e(dif)o(ferent)f(types)g(of)g(situations,) 1013 2053 y(two)d(co)o(v)o(er)j(spatiotemporal)e(characteristics)g(of)h (situa-)1013 2106 y(tions)10 b(\(Loc/T)n(ime\).)15 b(The)c(four)h (remaining)f(classes)f(co)o(v)o(er)1013 2158 y(the)21 b(two)g(dimensions)g(\223concrete)h(vs.)g(abstract)g(\(Con-)1013 2210 y(cept\)\224)10 b(and)g(\223human)h(\(Pers\))g(vs.)f(non-human)f (\(PhysObj\))1013 2263 y(vs.)i(institutio)o(nali)o(sed)d(groups)j(of)g (humans)g(\(Group\)\224.)1058 2316 y(Since)26 b(we)g(are)h(only)e (interested)f(in)i(the)g(decision)1013 2368 y(whether)17 b(to)h(employ)f(pronouns)f(rather)i(than)g(full)f(NPs)1013 2420 y(and)c(less)g(in)g(the)g(form)h(of)g(the)f(NP)g(itself,)h(and)f (since)g(our)1013 2473 y(methodology)c(is)i(based)h(on)f(corpus)g (annotation,)f(we)i(did)1013 2525 y(not)17 b(take)g(into)g(account)h (more)g(formal)h(semantic)f(cate-)1013 2577 y(gories)10 b(such)h(as)g(kinds)f(vs.)h(indi)o(viduals.)1013 2648 y Fp(Co-speci\002cation-le)o(vel)20 b(factors)46 b Fr(depend)21 b(on)g(infor)o(-)1013 2700 y(mation)h(about)h(sequences)g(of)g (referring)h(e)o(xpressions)p eop %%Page: 3 3 3 2 bop 0 2 909 2 v 25 34 a Fo(Person)80 b(one)10 b(or)g(more)h(human)f (beings)25 81 y(Group)86 b(institutio)o(nalized)8 b(group)h(of)h(human) g(beings)25 128 y(PhysObj)49 b(physical)9 b(object)25 175 y(Concept)54 b(abstract)10 b(concept)25 222 y(Loc)129 b(geographical)9 b(location)25 269 y(T)o(ime)107 b(date,)11 b(time)f(span)25 316 y(Ev)o(ent)97 b(sth.)12 b(which)e(takes)g(place)h (in)f(space)i(and)e(time)25 363 y(Action)79 b(sth.)12 b(which)e(is)g(done)25 410 y(State)110 b(state)10 b(of)g(af)o(fairs,)g (feeling,)g Fm(:)d(:)g(:)25 457 y Fo(Property)49 b(characteristic)10 b(or)g(attrib)o(ute)f(of)h(sth.)p 0 472 V 0 549 a Fr(T)l(able)20 b(2:)32 b(Ov)o(ervie)o(w)20 b(of)g(Sortal)g(Classes)g(with)f(rough)0 601 y(characterizations)10 b(of)i(rele)o(v)o(ant)e(synsets)0 730 y(which)i(co-specify)g(with)f(each)i(other)n(.)k(Such)12 b(a)h(sequence)0 782 y(consists)7 b(of)i(all)f(referring)h(e)o (xpressions)f(that)g(e)o(v)o(oke)g(or)h(ac-)0 834 y(cess)i(the)f(same)h (discourse)f(entity)m(.)j(In)e(this)e(paper)n(,)j(we)f(use)0 887 y(the)17 b(follo)o(wing)e(factors)i(from)h(the)f(literature:)25 b(distance)0 939 y(to)17 b(last)g(mention)g(\(Dist)f(and)i(Dist4\),)g (ambiguity)e(\(Am-)0 991 y(big\),)11 b(parallelism)g(\(P)o(ar\),)i (form)f(of)g(the)f(antecedent)g(\(F)o(or)o(-)0 1043 y(mAnte\),)22 b(and)e(syntactic)e(function)g(of)i(the)f(antecedent)0 1096 y(\(SynAnte\).)k(W)l(e)15 b(also)f(distinguish)d(between)j (discourse)0 1148 y(entities)c(that)g(are)i(only)e(e)o(v)o(oked)g (once,)i Fq(deadend)e Fr(entities,)0 1200 y(and)h(entities)f(that)g (are)i(accessed)f(repeatedly)m(.)45 1256 y(P)o(arallelism)19 b(is)g(de\002ned)f(on)h(the)f(basis)g(of)h(syntactic)0 1308 y(function:)h(a)15 b(referring)g(e)o(xpression)f(and)h(its)f (antecedent)0 1360 y(are)d(parallel)e(if)i(the)o(y)f(ha)o(v)o(e)g(the)g (same)h(syntactic)e(function.)45 1416 y(F)o(or)17 b(calculating)d (distance)g(and)h(ambiguity)m(,)h(we)f(se)o(g-)0 1468 y(mented)i(the)g(te)o(xts)g(into)f(major)h(clause)g(units)f(\(MCUs\).)0 1521 y(Each)26 b(MCU)h(consists)d(of)i(a)h(major)f(clause)g Fl(C)j Fr(plus)0 1573 y(an)o(y)13 b(subordinate)e(clauses)h(and)h(an)o (y)g(coordinated)e(major)0 1625 y(clauses)i(whose)g(subject)f(is)h(the) g(same)i(as)e(that)g(of)g Fl(C)k Fr(and)0 1677 y(where)11 b(that)g(subject)f(has)h(been)g(elided.)45 1733 y(Dist)i(pro)o(vides)h (the)f(number)i(of)f(MCUs)g(between)g(the)0 1785 y(current)d(and)g(the) g(last)f(pre)o(vious)g(mention)g(of)h(a)h(discourse)0 1838 y(entity)m(.)23 b(When)14 b(an)g(entity)f(is)h(e)o(v)o(oked)f(for) i(the)f(\002rst)g(time,)0 1890 y(Dist)d(is)h(set)f(to)h(\223D\224.)h (Dist4)e(is)g(deri)o(v)o(ed)h(from)h(Dist)e(by)h(as-)0 1942 y(signing)g(the)i(\002x)o(ed)g(distance)f(2)h(to)g(all)f (referring)h(e)o(xpres-)0 1994 y(sions)d(whose)h(antecedent)g(is)g (more)h(than)g(1)f(MCU)h(a)o(way)m(.)0 2047 y(Ambiguity)c(is)i (de\002ned)g(as)g(the)f(number)h(of)g(all)g(discourse)0 2099 y(entities)e(with)g(the)h(same)h(agreement)g(features)g(that)f (occur)0 2151 y(in)15 b(the)g(pre)o(vious)f(unit)g(or)i(in)f(the)g (same)h(unit)e(before)i(the)0 2204 y(current)11 b(referring)g(e)o (xpression.)0 2310 y Fu(3)50 b(Data)0 2386 y Fr(Our)14 b(data)g(consisted)f(of)i(twelv)o(e)f(\(plus)f(two\))g(te)o(xts)h(from) 0 2438 y(the)19 b(Bro)o(wn)h(corpus)e(and)i(the)f(corresponding)f (part-of-)0 2491 y(speech)23 b(and)h(syntactic)e(annotations)f(from)j (the)g(Penn)0 2543 y(T)n(reebank)c(\(LDC,)h(1995\).)40 b(The)20 b(te)o(xts)g(were)g(selected)0 2595 y(because)14 b(the)o(y)g(contained)f(relati)o(v)o(ely)g(little)f(or)i(no)g(direct)0 2648 y(speech;)9 b(se)o(gments)f(of)h(direct)f(speech)g(pose)h (problems)f(for)0 2700 y(both)13 b(pronoun)f(resolution)g(and)i (generation)f(because)h(of)1013 46 y(the)g(change)h(in)g(point)f(of)h (vie)o(w)m(.)25 b(Morpho-syntactic)14 b(in-)1013 98 y(formation)d(such) g(as)g(markables,)h(part-of-speech)g(labels,)1013 150 y(grammatical)k(role)g(labels,)g(and)g(form)h(of)f(referring)g(e)o(x-) 1013 203 y(pression)c(were)i(automatically)e(e)o(xtracted)i(from)g(the) g(e)o(x-)1013 255 y(isting)9 b(T)n(reebank)i(annotations.)1058 315 y(The)f(te)o(xts)g(come)i(from)f(four)f(dif)o(ferent)g(genres:)j (Popu-)1013 367 y(lar)f(Lore)h(\(CF\),)h(Belles)e(Lettres)g(\(CG\),)i (Fiction/General)1013 420 y(\(CK\),)25 b(and)f(Fiction/Mystery)e (\(CL\).)k(The)e(choice)g(of)1013 472 y(genres)16 b(was)g(dictated)g (by)h(the)g(a)o(v)o(ailability)d(of)j(detailed)1013 524 y(T)n(reebank-II)g(parses.)29 b(T)l(able)17 b(3)f(sho)o(ws)f(that)h (the)g(distri-)1013 577 y(b)o(ution)11 b(of)h(referring)h(e)o (xpressions)e(dif)o(fers)i(considerably)1013 629 y(between)d(genres.) 1058 689 y(The)16 b(te)o(xts)g(from)h(the)g(two)e(non-narrati)o(v)o(e)h (types,)h(CF)1013 741 y(and)k(CG,)h(contain)e(far)i(more)g(discourse)e (entities)g(and)1013 794 y(far)13 b(less)g(pronouns)f(than)h(the)g (narrati)o(v)o(e)g(genres)h(CK)f(and)1013 846 y(CL.)j(The)h(high)e (number)h(of)h(pronouns)d(in)i(CK)h(and)f(CL)1013 898 y(is)e(partly)h(due)f(to)h(the)g(fact)g(that)g(in)f(one)h(te)o(xt)g (from)h(each)1013 950 y(genre,)e(we)f(ha)o(v)o(e)g(a)h(\002rst)f (person)f(singular)g(narrator)n(.)20 b(CK)1013 1003 y(patterns)e(with)h (CF)h(and)g(CG)g(in)f(the)h(a)o(v)o(erage)g(number)1013 1055 y(of)c(MCUs;)k(the)c(sentences)g(in)g(the)g(sample)h(from)g(mys-) 1013 1107 y(tery)e(\002ction)g(are)h(shorter)f(and)g(ar)o(guably)g (less)g(comple)o(x.)1013 1160 y(CL)k(also)f(has)g(disproportionally)e (fe)o(w)j(deadend)f(refer)o(-)1013 1212 y(ents.)26 b(The)15 b(high)g(percentage)g(of)h(deadend)f(referents)h(in)1013 1264 y(CK)c(is)g(due)g(to)g(the)f(fact)i(that)e(two)g(of)i(the)e(te)o (xts)h(deal)g(with)1013 1317 y(relationship)i(between)j(two)f(people.) 32 b(These)17 b(four)g(dis-)1013 1369 y(course)e(referents)h(account)g (for)g(the)g(4)g(longest)e(corefer)o(-)1013 1421 y(ence)d(chains)g(in)f (CK)i(\(85,)f(96,)g(109,)g(and)g(127)g(mentions\).)1058 1481 y(T)l(wo)22 b(annotators)e(\(the)j(authors,)h(both)d(trained)h (lin-)1013 1534 y(guists\),)10 b(hand-labeled)h(the)h(te)o(xts)f(with)g (co-speci\002cation)1013 1586 y(information)d(based)h(on)h(the)f (speci\002cations)f(for)i(the)g(Mes-)1013 1638 y(sage)19 b(Understanding)f(Coreference)i(task)f(\(Hirschman)1013 1691 y(&)13 b(Chinchor)g(\(1997\);)g(for)h(theoretical)e(reasons,)h(we) h(did)1013 1743 y(not)e(mark)i(re\003e)o(xi)o(v)o(e)h(pronouns)d(and)h (appositi)o(v)o(es)e(as)j(co-)1013 1795 y(specifying\).)26 b(The)16 b(MCUs)g(were)g(labelled)f(by)g(the)h(sec-)1013 1847 y(ond)h(author)n(.)33 b(All)17 b(referring)h(e)o(xpressions)e (were)j(anno-)1013 1900 y(tated)c(with)g(agreement)i(and)f(sortal)f (class)h(information.)1013 1952 y(Labels)9 b(were)i(placed)g(using)e (the)h(GUI-based)g(annotation)1013 2004 y(tool)h(R)r Fk(E)r(F)r(E)r(R)r(E)r(E)k Fr(\(DeCristofaro)c(et)g(al.,)h(1999\).)1058 2065 y(The)h(annotators)f(de)o(v)o(eloped)g(the)h(Sortal)h(Class)f (anno-)1013 2117 y(tation)f(guidelines)g(on)h(the)h(basis)f(of)g(two)g (training)f(te)o(xts.)1013 2169 y(Then,)i(both)e(labellers)h(annotated) f(two)h(te)o(xts)g(from)h(each)1013 2221 y(genre)k(independently)f (\(eight)h(in)g(total\).)36 b(These)18 b(eight)1013 2274 y(te)o(xts)e(were)h(used)g(to)g(determine)g(the)f(reliability)f(of)i (the)1013 2326 y(sortal)e(class)h(coding)f(scheme.)30 b(Since)17 b(sortal)e(class)h(an-)1013 2378 y(notation)11 b(is)i(intrinsically)d(hard,)k(the)f(annotators)f(looked)1013 2431 y(up)e(the)h(senses)f(of)h(the)g(head)g(noun)f(of)h(each)h (referring)f(NP)1013 2483 y(that)i(was)h(not)f(a)i(pronoun)e(or)h(a)g (proper)g(name)h(in)f(W)l(ord-)1013 2535 y(Net.)f(Each)c(sense)f(was)h (mapped)g(directly)f(to)g(one)h(or)g(more)1013 2588 y(of)k(the)h(ten)f (classes)h(gi)o(v)o(en)f(in)g(T)l(able)h(2.)22 b(The)14 b(annotators)1013 2640 y(then)c(chose)h(the)g(adequate)g(sense.)1058 2700 y(The)i(reliability)e(of)i(the)g(annotations)d(were)k(measured)p eop %%Page: 4 4 4 3 bop 25 33 a Fo(Genre)p 150 47 2 47 v 50 w(words)49 b(ref.)13 b(e)o(xpr)n(.)50 b(entities)f(sequ.)h(MCUs)g Fn(\045)10 b Fo(pron.)426 b Fn(\045)10 b Fo(deadend)50 b(med.)12 b(len.)p 0 49 1964 2 v 25 81 a(CF)p 150 95 2 47 v 100 w(6097)67 b(1725)116 b(1223)86 b(125)73 b(304)98 b(19.59)p Fn(\045)9 b Fo(\(1.8)p Fn(\045)p Fo(,)h(0.3)p Fn(\045)p Fo(,)h(58.3)p Fn(\045)p Fo(\))69 b(89.78)p Fn(\045)104 b Fo(3)25 128 y(CG)p 150 142 V 93 w(6103)67 b(1707)116 b(1290)86 b(120)73 b(269)98 b(16.17)p Fn(\045)9 b Fo(\(9.8)p Fn(\045)p Fo(,)h(1.1)p Fn(\045)p Fo(,)h(4)p Fn(\045)p Fo(\))121 b(90.70)p Fn(\045)104 b Fo(2)25 175 y(CK)p 150 189 V 93 w(6020)67 b(1848)116 b(1071)86 b(113)73 b(386)98 b(36.15)p Fn(\045)9 b Fo(\(19.5)p Fn(\045)p Fo(,)h(1.2)p Fn(\045)p Fo(,)g(56.1)p Fn(\045)p Fo(\))49 b(89.45)p Fn(\045)104 b Fo(2)25 222 y(CL)p 150 236 V 98 w(6018)67 b(1846)116 b(954)107 b(170)73 b(477)98 b(35.64)p Fn(\045)9 b Fo(\(14.0)p Fn(\045)p Fo(,)h(1.5)p Fn(\045)p Fo(,)g(53.6)p Fn(\045)p Fo(\))49 b(80.09)p Fn(\045)104 b Fo(4)0 334 y Fr(T)l(able)12 b(3:)k(Rele)o(v)o(ant)c(quantitati)o(v)o (e)e(characteristics)i(of)g(the)g(te)o(xts.)17 b(A)m(v)o(erage)c (length:)h(2020)e(words,)f(120)h(MCUs.)18 b Fq(sequ.:)0 386 y Fr(number)c(of)f(sequences)g(of)h(co-specifying)e(referring)i(e)o (xpressions.)20 b Fq(\045)13 b(deadend:)18 b Fr(percentage)c(of)f (discourse)g(entities)0 439 y(mentioned)c(only)f(once.)14 b Fq(\045)c(pr)n(onouns:)h Fr(percentage)f(of)g(all)f(referring)h(e)o (xpressions)e(realized)i(as)f(pronouns,)g(in)g(brackets:)0 491 y(perc.)27 b(of)15 b(\002rst)h(person)e(singular)g(pronouns,)h (perc.)27 b(of)16 b(second)e(person)h(singular)f(pronouns,)h(perc.)27 b(of)15 b(third)g(person)0 543 y(singular)8 b(masculine)g(and)h (feminine)g(pronouns.)j Fq(med.)d(len.:)k Fr(median)d(length)d(of)j (sequences)e(of)h(co-specifying)f(referring)0 596 y(e)o(xpressions)0 718 y(with)j(Cohen')n(s)h Fl(\024)g Fr(\(Cohen,)h(1960;)e(Carletta,)i (1996\).)j(Co-)0 770 y(hen)10 b(\(1960\))f(sho)o(ws)f(that)h(a)h Fl(\024)h Fr(between)e(0.68)g(and)h(0.80)g(al-)0 822 y(lo)o(ws)e(tentati)o(v)o(e)h(conclusions,)f(while)h Fl(\024)h(>)g Fr(0.80)f(indicates)0 875 y(reliable)i(annotations.)i(F)o (or)g(genres)e(CF)h(\()p Fl(\024)g Fj(=)g Fr(0.83\),)g(CK)0 927 y(\()p Fl(\024)j Fj(=)f Fr(0.84\))g(and)g(CL)g(\()p Fl(\024)h Fj(=)f Fr(0.83\),)h(the)f(sortal)f(class)g(an-)0 979 y(notations)8 b(were)i(indeed)g(reliable,)g(b)o(ut)f(not)g(for)h (genre)g(CG)0 1031 y(\()p Fl(\024)17 b Fj(=)f Fr(0.63\).)29 b(Ne)o(v)o(ertheless,)17 b(o)o(v)o(erall,)h(the)e(sortal)f(class)0 1084 y(annotations)e(were)i(reliable)g(\()p Fl(\024)g Fj(=)g Fr(0.8\).)26 b(Problems)15 b(are)0 1136 y(mainly)d(due)g(to)g (the)h(abstract)f(classes)g(Concept,)g(Action,)0 1188 y(Ev)o(ent,)21 b(State,)h(and)d(Property)m(.)38 b(Abstract)19 b(head)g(nouns)0 1241 y(sometimes)e(ha)o(v)o(e)g(se)o(v)o(eral)g (senses)g(that)f(\002t)i(the)e(conte)o(xt)0 1293 y(almost)f(equally)h (well,)h(b)o(ut)e(that)h(lead)f(to)h(dif)o(ferent)g(sor)o(-)0 1345 y(tal)11 b(classes.)j(Another)c(problem)h(is)g(metaphorical)g (usage.)0 1398 y(This)e(e)o(xplains)h(the)g(bad)g(results)f(for)h(CG,)h (which)f(features)0 1450 y(man)o(y)i(abstract)e(discourse)g(entities.)0 1549 y Fu(4)50 b(T)-5 b(owards)12 b(a)h(Pr)o(obabilistic)75 1603 y(Genr)o(e-Independent)d(Model)0 1675 y Fr(In)f(this)g(section,)g (we)g(in)n(v)o(estigate)f(to)h(what)g(e)o(xtent)g(the)g(fac-)0 1728 y(tors)f(proposed)g(in)g(section)g(2.2)h(in\003uence)g(the)g (decision)e(to)0 1780 y(prominalize.)13 b(F)o(or)c(the)g(purpose)f(of)h (the)g(statistical)e(analy-)0 1832 y(sis,)k(pronominalization)d(is)j (modelled)f(by)h(a)h(feature)f(Pro.)0 1885 y(F)o(or)g(a)f(gi)o(v)o(en)f (referring)h(e)o(xpression,)g(that)f(feature)h(has)g(the)0 1937 y(v)o(alue)16 b(\223P\224)h(if)g(the)f(referring)h(e)o(xpression)e (is)i(a)g(personal)0 1989 y(or)f(a)h(possessi)o(v)o(e)d(pronoun,)j (else)f(\223N\224.)h(W)l(e)g(model)f(this)0 2042 y(v)o(ariable)11 b(with)f(a)h(binomial)f(distrib)o(ution.)688 2025 y Fi(1)0 2126 y Fp(4.1)45 b(How)11 b(do)g(the)h(F)o(actors)f(Affect)102 2179 y(Pr)o(onominalization?)0 2246 y Fr(First,)18 b(we)e(e)o(xamine)h (for)g(all)f(nine)g(factors)g(if)g(there)g(is)g(a)0 2298 y(statistical)7 b(association)g(between)i(these)g(factors)f(and)h(Pro.) 0 2351 y(Standard)h(non-parametric)g(tests)f(sho)o(w)h(a)g(strong)f (associ-)0 2403 y(ation)j(between)g(all)h(nine)f(factors)g(and)h(Pro.) 707 2386 y Fi(2)745 2403 y Fr(This)f(holds)p 0 2440 375 2 v 53 2467 a Fh(1)67 2483 y Fg(F)o(or)i(all)f(statistical)h (calculations)d(and)i(for)g(the)g(logistic)g(re)o(gres-)0 2526 y(sion)c(analyses)f(reported)i(belo)o(w)n(,)g(we)f(used)g(R)h (\(Ihaka)g(&)g(Gentleman,)0 2569 y(1996\).)53 2599 y Fh(2)67 2614 y Fg(W)m(e)17 b(used)d(the)i(Kruskal-W)m(allis)h(test)f (for)h(the)f(ordinal)h(Ambig)0 2657 y(v)o(ariable)8 b(and)g(the)g Ff(\037)269 2641 y Fe(2)286 2657 y Fg(-test)h(for)g(the)g(other)o(,)f (nominal,)h(v)o(ariables.)h(Since)0 2700 y(\002rst)h(mentions)e(and)h (deadends)d(are)j(coded)f(by)h(the)g(character)g(\223D\224)f(in)1013 718 y Fr(both)i(for)j(all)e(referring)h(e)o(xpressions)f(and)h(for)g (those)f(that)1013 770 y(occur)19 b(in)f(sequences)h(of)g (co-specifying)e(referring)j(e)o(x-)1013 822 y(pressions.)32 b(All)17 b(of)g(the)h(tests)e(were)j(signi\002cant)d(at)h(the)1013 875 y Fl(p)e(<)i Fj(0)p Fl(:)p Fj(001)p Fr(-le)o(v)o(el,)12 b(with)g(the)h(e)o(xception)f(of)h(P)o(ar:)18 b(for)c(e)o(x-)1013 927 y(pressions)8 b(that)i(are)h(part)g(of)f(co-speci\002cation)g (sequences)1013 979 y(the)g(ef)o(fect)i(of)f(that)g(factor)g(is)f(not)h (signi\002cant.)1058 1036 y(In)i(the)g(ne)o(xt)g(analysis)e(step,)j(we) f(determine)g(which)f(of)1013 1088 y(the)k(feature)h(v)o(alues)f(are)i (associated)e(disproportionall)o(y)1013 1140 y(often)g(with)f (pronouns,)i(and)f(which)g(v)o(alues)f(tend)h(to)g(be)1013 1193 y(associated)10 b(with)h(full)h(NPs.)k(More)d(speci\002cally)m(,)f (we)g(test)1013 1245 y(for)g(each)h(feature-v)o(alue)f(pair)g(if)h(the) f(pronominalization)1013 1297 y(probability)c(is)j(signi\002cantly)e (higher)i(or)g(lo)o(wer)g(than)g(that)1013 1350 y(computed)h(o)o(v)o (er)i(\(a\))f(the)g(complete)g(data)g(set,)h(\(b\))f(all)g(re-)1013 1402 y(ferring)19 b(e)o(xpressions)f(in)g(sequences)h(of)h (co-specifying)1013 1454 y(referring)15 b(e)o(xpressions,)g(\(c\))h (all)f(third)f(person)h(referring)1013 1506 y(e)o(xpressions)10 b(in)h(sequences.)k(Almost)c(all)g(feature)h(v)o(alues)1013 1559 y(sho)o(w)i(highly)g(signi\002cant)g(ef)o(fects)i(for)f(\(a\))h (and)g(\(b\),)h(b)o(ut)1013 1611 y(some)e(of)h(these)f(ef)o(fects)h(v)o (anish)e(in)i(condition)d(\(c\).)28 b(Be-)1013 1663 y(lo)o(w)m(,)10 b(we)i(report)f(on)f(associations)f(which)i(are)h(signi\002cant)1013 1716 y(at)f Fl(p)h(<)h Fj(0)p Fl(:)p Fj(001)d Fr(under)h(all)g(three)g (conditions.)1058 1772 y(Unsurprisingly)m(,)h(there)j(is)f(a)g(strong)f (ef)o(fect)i(of)f(agree-)1013 1825 y(ment)h(v)o(alues:)21 b(NPs)15 b(referring)h(to)e(the)h(\002rst)h(and)f(second)1013 1877 y(person)10 b(are)h(always)f(pronominalized,)f(and)i(third)f (person)1013 1929 y(masculine)h(or)h(feminine)g(NPs,)h(which)e(can)i (refer)g(to)f(per)o(-)1013 1981 y(sons,)d(are)h(pronominalized)f(more)h (frequently)f(than)g(third)1013 2034 y(person)h(neuter)i(and)f(third)g (person)g(plural.)j(Pronouns)d(are)1013 2086 y(strongly)c(preferred)j (if)f(the)f(distance)h(to)f(the)h(antecedent)g(is)1013 2138 y(0)g(or)h(1)g(MCUs.)k(Referring)c(e)o(xpressions)f(are)h(more)h (likely)1013 2191 y(to)k(be)h(pronominalized)e(in)h(subject)g(position) e(than)i(as)h(a)1013 2243 y(PP)c(adjunct,)g(and)g(referring)g(e)o (xpressions)e(with)i(adjuncts)1013 2295 y(as)18 b(antecedents)g(are)h (also)f(pronominalized)f(less)h(often)1013 2348 y(than)11 b(those)g(with)g(antecedents)g(in)g(subject)g(or)h(object)f(po-)1013 2400 y(sition.)22 b(There)15 b(is)f(a)h(clear)h(preference)f(for)g (pronouns)e(as)1013 2452 y(possessi)o(v)o(e)18 b(determiners,)23 b(and)e(referring)g(e)o(xpressions)1013 2504 y(that)15 b(co-specify)g(with)g(an)h(antecedent)f(possessi)o(v)o(e)f(pro-)1013 2557 y(noun)i(are)i(highly)e(likely)g(to)h(be)h(pronominalised.)32 b(W)l(e)p 1013 2614 V 1013 2657 a Fg(both)11 b(Dist)h(and)f(Dist4,)i (both)e(are)h(treated)g(as)f(a)g(cate)o(gorical)h(v)o(ariable)1013 2700 y(by)c(R.)i(F)o(or)f(more)h(on)e(these)g(tests,)i(see)e (\(Agresti,)i(1990\).)p eop %%Page: 5 5 5 4 bop 0 46 a Fr(also)11 b(notice)g(strong)g(genre-independent)f(ef)o (fects)i(of)f(par)o(-)0 98 y(allelism.)i(Although)8 b(at)i(\002rst)h (glance,)f(Ambig)g(appears)g(to)0 150 y(ha)o(v)o(e)i(a)f(signi\002cant) f(ef)o(fect)i(as)g(well,)f(\(median)g(ambiguity)0 203 y(for)k(nouns)f(is)g(3,)i(median)f(ambiguity)f(for)h(pronouns)e(0\),)0 255 y(closer)i(inspection)e(re)o(v)o(eals)j(that)e(this)h(is)f(mainly)h (due)g(to)0 307 y(\002rst)f(and)f(second)g(person)g(and)h(third)e (person)h(masculine)0 359 y(and)e(feminine)g(pronouns.)45 412 y(The)22 b(sortal)f(classes)g(sho)o(w)g(a)h(number)g(of)g (interest-)0 464 y(ing)14 b(patterns)h(\(cf.)h(T)l(able)f(4\).)26 b(Not)15 b(only)f(do)g(the)h(classes)0 516 y(dif)o(fer)j(in)f(the)g (percentage)h(of)g(deadend)f(entities,)i(there)0 569 y(are)k(also)e(marked)h(dif)o(ferences)g(in)g(pronominalizabil-)0 621 y(ity)m(.)52 b(There)24 b(appear)h(to)e(be)h(three)g(groups)f(of)i (sortal)0 673 y(classes:)12 b(Person/Group,)d(with)g(the)h(lo)o(west)e (rate)j(of)f(dead-)0 726 y(end)25 b(entities)f(and)h(the)g(highest)e (percentage)j(of)f(pro-)0 778 y(nouns)16 b(\226)h(not)g(only)g(due)g (to)g(the)g(\002rst)g(and)g(second)g(per)o(-)0 830 y(son)e(personal)f (pronouns)f(\226)j(,)g(Location/PhysObj,)e(with)0 883 y(roughly)h(two)g(thirds)g(of)i(all)f(entities)f(not)g(in)h(sequences)0 935 y(and)22 b(a)h(signi\002cantly)d(lo)o(wer)h(pronominalization)f (rate,)0 987 y(and)d(Concept/Action/Ev)o(ent/Property)o(/State/Con)o (cept,)0 1039 y(with)11 b(o)o(v)o(er)i(80\045)f(deadend)g(entities.)k (W)n(ithin)11 b(this)g(group,)0 1092 y(Action,)27 b(Ev)o(ent,)g(and)d (Concept)f(are)i(pronominalized)0 1144 y(more)10 b(frequently)d(than)i (State)g(and)g(Property)m(.)k(T)n(ime)d(is)e(the)0 1196 y(least)17 b(frequently)g(pronominalized)f(class.)32 b(An)18 b(impor)o(-)0 1249 y(tant)9 b(reason)g(for)h(the)f(dif)o (ference)h(between)f(Loc)g(and)g(T)n(ime)0 1301 y(might)g(be)g(that)g (T)n(imes)h(are)g(almost)e(always)g(referred)j(back)0 1353 y(to)i(by)h(temporal)f(adv)o(erbs,)i(while)e(locations,)g (especially)0 1406 y(to)o(wns)f(and)h(countries,)f(can)i(also)e(be)i (accessed)f(via)g(third)0 1458 y(person)e(neuter)f(personal)h (pronouns.)45 1510 y(Interactions)j(between)h(the)g(factors)g(and)g (genre)g(were)0 1562 y(e)o(xamined)i(by)f(an)g(analysis)f(of)i(de)o (viance)f(run)g(on)g(a)g(\002t-)0 1615 y(ted)d(logistic)e(re)o (gression)h(model;)i(signi\002cance)e(was)g(cal-)0 1667 y(culated)i(using)f(the)h(F-test.)23 b(All)14 b(factors)g(e)o(xcept)g (for)h(P)o(ar)0 1719 y(sho)o(w)g(strong)f(\()p Fl(p)21 b(<)h Fj(0)p Fl(:)p Fj(001)p Fr(\))14 b(interactions)g(with)h(Genre.)0 1772 y(In)10 b(other)g(words,)f(the)h(in\003uence)g(of)g(all)g(factors) f(b)o(ut)h(paral-)0 1824 y(lelism)j(on)g(pronominalization)e(is)i (mediated)h(by)f(Genre.)0 1876 y(There)19 b(are)h(two)e(main)h(reasons) g(for)g(this)f(ef)o(fect:)30 b(\002rst,)0 1929 y(some)14 b(genres)f(contain)g(far)h(more)g(\002rst)g(and)f(second)g(per)o(-)0 1981 y(son)e(personal)f(pronouns,)h(which)f(adds)h(to)g(the)g(weight)g (of)0 2033 y(Agree,)19 b(and)d(second,)i(te)o(xts)e(which)g(are)h (about)f(persons)0 2086 y(and)d(the)h(actions)e(of)i(persons,)g(such)f (as)h(the)f(te)o(xts)g(in)g(CK)0 2138 y(and)d(CL,)g(tend)g(to)f(use)h (more)g(pronouns)f(than)g(te)o(xts)g(which)0 2190 y(are)j(mainly)e(ar)o (gumentati)o(v)o(e)h(or)g(e)o(xpository)m(.)0 2270 y Fp(4.2)45 b(Which)12 b(F)o(actors)f(ar)o(e)h(Important?)0 2334 y Fr(T)l(o)e(separate)h(the)f(important)f(from)i(the)f (unimportant)e(fac-)0 2386 y(tors,)15 b(man)o(y)g(researchers)g(use)g (decision)e(and)h(re)o(gression)0 2438 y(trees,)24 b(mostly)c(the)h (binary)f(CAR)m(T)i(v)o(ariant)e(\(Breiman)0 2491 y(et)11 b(al.,)h(1984\).)h(W)l(e)f(use)e(a)i(dif)o(ferent)e(kind)g(of)h(model)g (here,)0 2543 y(logistic)j(re)o(gression,)i(which)f(is)g(especially)g (well)g(suited)0 2595 y(for)e(cate)o(gorical)g(data)g(analysis)f(\(cf.) i(e)o(g.)g(Agresti)e(\(1990\))0 2648 y(or)j(K)o(essler)f(et)h(al.)26 b(\(1997\)\).)f(In)15 b(this)f(model,)i(the)f(v)o(alue)0 2700 y(of)i(the)f(binary)g(tar)o(get)h(v)o(ariable)f(is)g(predicted)g (by)g(a)h(lin-)1013 46 y(ear)i(combination)e(of)i(the)g(predictor)f(v)o (ariables.)36 b(V)-5 b(ari-)1013 98 y(able)15 b(weights)g(indicate)g (the)h(importance)f(of)h(a)h(v)o(ariable)1013 150 y(for)f (classi\002cation:)21 b(the)16 b(higher)f(the)h(absolute)f(v)o(alue)g (of)1013 203 y(the)10 b(weight,)h(the)g(more)h(important)e(it)g(is.) 1058 259 y(Logistic)e(re)o(gression)h(models)g(are)h(not)f(only)g(e)o (v)o(aluated)1013 311 y(by)k(their)g(performance)h(on)f(training)f(and) i(test)e(data.)22 b(W)l(e)1013 363 y(could)11 b(easily)h(construct)g(a) h(perfect)g(model)f(of)h(an)o(y)g(train-)1013 416 y(ing)e(data)h(set)g (with)g Fl(n)h Fr(v)o(ariables,)f(where)g Fl(n)h Fr(is)f(the)g(size)g (of)1013 468 y(the)f(data)g(set.)k(But)c(we)h(need)f(models)g(that)g (are)h(small,)f(yet)1013 520 y(predict)j(the)h(tar)o(get)h(v)o(alues)e (well.)27 b(A)15 b(suitable)f(criterion)1013 572 y(is)i(the)h(Akaike)f (Information)h(Criterion)f(\(AIC,)i(Akaike)1013 625 y(\(1974\)\),)12 b(which)f(punishes)f(both)h(models)h(that)f(do)h(not)f(\002t)1013 677 y(the)17 b(data)h(well)f(and)h(models)f(that)g(ha)o(v)o(e)i(too)e (man)o(y)h(pa-)1013 729 y(rameters.)28 b(The)15 b(quality)f(of)i(a)g (factor)f(is)h(judged)e(by)h(the)1013 782 y(amount)c(of)i(v)o(ariation) e(in)h(the)g(tar)o(get)g(v)o(ariable)g(that)g(it)g(e)o(x-)1013 834 y(plains.)g(Note)d(that)f(increased)i(prediction)d(accurac)o(y)k (does)1013 886 y(not)j(necessarily)g(mean)i(an)f(increase)g(in)g(the)g (amount)f(of)1013 939 y(v)o(ariation)c(e)o(xplained.)15 b(As)d(the)f(model)h(itself)f(is)g(a)h Fq(contin-)1013 991 y(uous)i Fr(approximation)g(of)h(the)g Fq(cate)n(gorical)f Fr(distinctions)1013 1043 y(to)8 b(be)h(modelled,)h(it)e(may)i(occur)f (that)f(the)h(numerical)g(v)o(ari-)1013 1096 y(ation)k(in)h(the)h (predictions)d(decreases,)17 b(b)o(ut)d(that)f(this)h(de-)1013 1148 y(crease)f(is)f(lost)f(when)i(re-translating)e(numerical)h (predic-)1013 1200 y(tions)d(into)h(cate)o(gorical)h(ones.)1058 1256 y(The)g(factors)f(for)h(our)f(model)h(were)g(selected)f(based)h (on)1013 1308 y(the)f(follo)o(wing)f(procedure:)k(W)l(e)f(start)e(with) g(a)h(model)g(that)1013 1361 y(always)c(predicts)h(the)h(most)f (frequent)h(class.)k(W)l(e)d(then)e(de-)1013 1413 y(termine)h(which)g (factor)g(pro)o(vides)f(the)h(greatest)g(reduction)1013 1465 y(in)i(the)g(AIC,)h(add)g(that)f(factor)g(to)g(the)g(model)h(and)f (retrain.)1013 1518 y(This)e(step)g(is)h(repeated)g(until)f(all)h (factors)g(ha)o(v)o(e)g(been)h(used)1013 1570 y(or)i(adding)f(another)h (factor)g(does)g(not)f(yield)h(an)o(y)g(signi\002-)1013 1622 y(cant)e(impro)o(v)o(ements)g(an)o(ymore.)1539 1606 y Fi(3)1058 1678 y Fr(This)22 b(procedure)h(in)n(v)o(ariably)f(yields)g (the)h(sequence)1013 1731 y(Dist4,)9 b(Agree,)h(Class,)h(F)o(ormAnte,)f (Syn,)h(SynAnte,)f(Am-)1013 1783 y(big,)f(P)o(ar)n(,)j(both)c(when)h (training)g(models)g(on)g(the)g(complete)1013 1835 y(data)f(set)h(and)f (when)h(training)e(on)i(a)g(single)f(genre.)13 b(Inspec-)1013 1888 y(tion)h(of)h(the)h(AIC)g(v)o(alues)e(suggests)g(that)h (parallelism)g(is)1013 1940 y(the)c(least)g(important)f(factor)n(,)i (and)g(does)f(not)g(impro)o(v)o(e)g(the)1013 1992 y(AIC)e (signi\002cantly)m(.)i(Therefore,)f(we)f(will)e(discard)i(it)f(from) 1013 2044 y(the)15 b(outset.)27 b(All)15 b(other)g(factors)g(are)h (maintained)f(in)g(the)1013 2097 y(initial)c(full)i(model.)22 b(This)13 b(model)g(is)g(purely)g(additi)o(v)o(e;)h(it)1013 2149 y(does)e(not)g(include)f(interactions)g(between)i(factors.)18 b(This)1013 2201 y(approach)c(allo)o(ws)f(us)h(to)h(\002lter)f(out)g (factors)g(which)g(only)1013 2254 y(mediate)9 b(the)g(in\003uence)h(of) f(other)g(factors,)h(b)o(ut)f(do)g(not)g(e)o(x-)1013 2306 y(ert)j(an)o(y)g(signi\002cant)f(in\003uence)g(of)h(their)g(o)o (wn.)k(Note)c(that)1013 2358 y(this)i(probabilistic)e(model)j(only)f (pro)o(vides)h(a)g(numerical)1013 2411 y(description)10 b(of)i(ho)o(w)g(its)f(factors)h(af)o(fect)g(pronominaliza-)1013 2463 y(tion)h(in)h(our)g(corpus.)24 b(As)14 b(such,)h(it)f(is)g(not)g (equi)o(v)o(alent)f(to)1013 2515 y(a)d(theoretical)g(model,)h(b)o(ut)f (rather)g(pro)o(vides)g(data)g(for)h(fur)o(-)p 1013 2571 375 2 v 1065 2599 a Fh(3)1080 2614 y Fg(W)m(e)g(e)o(xcluded)e(Dist)i (from)h(this)f(stepwise)e(procedure,)i(since)e(the)1013 2657 y(rele)o(v)o(ant)k(information)i(is)f(co)o(v)o(ered)g(already)f (by)h(Dist4,)i(which)e(fur)o(-)1013 2700 y(thermore)9 b(has)f(much)g(fe)o(wer)i(v)o(alues.)p eop %%Page: 6 6 6 5 bop 25 33 a Fo(Class)p 389 47 2 47 v 313 w(Act)49 b(Concept)h(Ev)o(ent)g(Group)57 b(Loc)51 b(Pers)f(PhysObj)f(Prop)g (State)h(T)o(ime)p 0 49 1838 2 v 25 81 a(\045)10 b(deadend)p 389 95 2 47 v 207 w(84.1)115 b(80.0)73 b(88.0)83 b(46.1)49 b(63.3)h(17.3)120 b(65.5)55 b(88.5)60 b(87.8)j(92.9)25 128 y(\045)10 b(pronouns)p 389 142 V 210 w(6.2)136 b(8,5)94 b(6.0)83 b(28.4)70 b(5.7)50 b(63.4)120 b(10.2)76 b(2.5)k(3.2)k(0.3)25 175 y(\045)10 b(pron.)i(\(sequences\))p 389 189 V 51 w(32.5)115 b(29.6)73 b(33.3)83 b(51.6)49 b(15.4)h(73.8)120 b(27.2)55 b(21.4)60 b(23,7)83 b(4.5)0 266 y Fr(T)l(able)17 b(4:)26 b(Results)16 b(for)h(Sortal)g(Classes.)32 b(\045)17 b(deadend:)25 b(percentage)17 b(of)g(deadend)g(entities;)h(\045)f (pronouns:)24 b(percent)0 318 y(pronominalised,)10 b(\045)i(pron.)k (\(sequences:)e(percent)e(pronominalised)e(relati)o(v)o(e)h(to)g(all)h (occurrences)g(in)f(co-speci\002cation)0 371 y(sequences)p 0 453 979 2 v 240 500 2 47 v 309 485 a Fo(CF)85 b(CG)g(CK)90 b(CL)p 813 500 V 123 w(all)p 0 501 979 2 v 25 534 a Fn(\045)10 b Fo(correct)p 240 548 2 47 v 102 w Fd(97.1)70 b Fo(93.5)g(93.6)h(91.5) p 813 548 V 91 w(93.1)25 581 y(AIC)p 240 595 V 169 w Fd(324.7)49 b Fo(654.8)h(786.1)f(904.0)p 813 595 V 49 w(2685.8)25 628 y Fn(\045)10 b Fo(v)o(ariation)p 240 642 V 69 w Fd(83.0)70 b Fo(65.4)g(70.1)h(65.4)p 813 642 V 91 w(68.7)0 719 y Fr(T)l(able)20 b(5:)31 b(Quality)18 b(of)i(models)f(\002tted)g(to)h(each)g(of)g(the)0 771 y(genre-speci\002c)d(corpora)h(\(CF)l(,)g(CG,)g(CK,)g(CL\))g(and)f(the) 0 823 y(complete)g(data)g(set)g(\(all\).)33 b Fj(\045)17 b Fr(correct:)26 b(correctly)17 b(pre-)0 875 y(dicted)11 b(pronominalization)f(decition,)h(AIC:)h(Akaike)f(In-)0 928 y(formation)23 b(Criterion,)j Fj(\045)e Fr(v)o(ariation:)37 b(percentage)24 b(of)0 980 y(original)16 b(v)o(ariation)f(in)i(the)g (data)g(\(as)g(measured)h(by)e(de-)0 1032 y(viance\))11 b(accounted)g(for)g(by)g(the)g(model)0 1210 y(ther)g(theoretical)f (interpretation.)45 1265 y(Results)21 b(of)g(a)h(\002rst)f(e)o(v)o (aluation)f(of)h(the)g(full)g(model)0 1317 y(are)g(summarized)g(in)f(T) l(able)g(5.)42 b(The)20 b(model)g(can)h(e)o(x-)0 1369 y(plain)c(more)h(than)f(two)f(thirds)g(of)i(the)f(v)o(ariation)f(in)h (the)0 1422 y(complete)9 b(data)g(set)g(and)g(can)h(predict)e (pronominalization)0 1474 y(quite)17 b(well)f(on)i(the)f(data)g(it)g (was)g(\002tted)g(on.)33 b(The)17 b(mat-)0 1526 y(ter)12 b(becomes)g(more)h(interesting)d(when)h(we)i(e)o(xamine)f(the)0 1578 y(genre-speci\002c)j(results.)25 b(Although)13 b(o)o(v)o(erall)i (prediction)0 1631 y(performance)d(remains)f(stable,)g(the)g(model)g (is)g(ob)o(viously)0 1683 y(suited)e(better)h(to)f(some)h(genres)g (than)g(to)f(others.)k(The)d(best)0 1735 y(results)j(are)j(obtained)d (on)h(CF)l(,)i(the)e(worst)g(on)g(CL)g(\(mys-)0 1788 y(tery)j(\002ction\).)33 b(In)17 b(the)g(CL)h(te)o(xts,)h(MCUs)f(are)g (short,)g(a)0 1840 y(third)9 b(of)h(all)g(referring)g(e)o(xpressions)e (are)j(pronouns,)e(there)0 1892 y(is)15 b(no)f(\002rst)h(person)f (singular)g(narrator)n(,)j(and)e(most)f(para-)0 1945 y(graphs)h(which)f(mention)g(persons)h(are)h(about)e(the)h(inter)o(-)0 1997 y(action)10 b(between)h(two)f(persons.)0 2072 y Fp(The)23 b(Relative)c(Importance)j(of)g(F)o(actors.)45 b Fr(All)21 b(v)o(al-)0 2125 y(ues)16 b(of)g(Dist4)e(ha)o(v)o(e)i(v)o (ery)h(strong)d(weights)h(in)g(all)h(mod-)0 2177 y(els;)i(this)e(is)g (clearly)g(the)g(most)g(important)g(factor)n(.)29 b(The)0 2229 y(same)10 b(goes)e(for)i(Agree,)g(where)f(the)g(\002rst)g(and)g (second)f(per)o(-)0 2282 y(son)j(are)i(strong)d(signs)h(of)h (pronominalization,)e(and,)i(to)f(a)0 2334 y(lesser)i(de)o(gree,)j (masculine)d(and)g(feminine)h(third)e(person)0 2386 y(singular)n(.)27 b(The)16 b(most)f(important)g(distinction)f(pro)o(vided)0 2438 y(by)g(Class)h(appears)g(to)f(be)h(that)f(between)h(Persons,)g (non-)0 2491 y(Persons,)j(and)e(T)n(imes.)30 b(This)15 b(holds)g(as)h(well)g(when)g(the)0 2543 y(model)e(is)g(only)g(trained)g (on)g(third)g(person)f(referring)i(e)o(x-)0 2595 y(pressions.)26 b(F)o(or)16 b(singular)e(referring)i(e)o(xpressions,)g(Per)o(-)0 2648 y(sonhood)c(information)h(is)g(re\003ected)h(in)g(gender)n(,)g(b)o (ut)f(not)0 2700 y(for)g(plural)f(referring)h(e)o(xpressions.)18 b(Another)12 b(important)1013 492 y(in\003uence)k(is)g(the)h(form)g(of) f(the)h(antecedent.)30 b(The)17 b(syn-)1013 545 y(tactic)11 b(function)f(of)i(the)f(referring)h(e)o(xpression)f(and)g(of)h(its)1013 597 y(antecedent)e(are)i(less)f(important,)f(as)h(is)g(ambiguity)m(.) 1058 650 y(In)18 b(order)g(to)g(e)o(xamine)g(the)g(importance)g(of)g (the)g(fac-)1013 702 y(tors)e(in)h(more)h(detail,)g(we)g(re\002tted)f (the)g(models)g(on)f(the)1013 755 y(complete)10 b(data)h(set)f(while)g (omitting)g(one)g(or)h(more)h(of)e(the)1013 807 y(three)j(central)g (features)g(Dist4,)f(Agree,)j(and)e(Class.)19 b(The)1013 859 y(results)12 b(are)i(summarized)f(in)g(T)l(able)h(6.)20 b(The)14 b(most)e(inter)o(-)1013 912 y(esting)k(\002nding)g(is)h(that)g (e)o(v)o(en)h(if)g(we)f(e)o(xclude)h(all)f(three)1013 964 y(factors,)k(prediction)e(accurac)o(y)i(only)e(drops)g(by)h Fj(3)p Fl(:)p Fj(2\045)p Fr(.)1013 1016 y(This)15 b(means)i(that)e(the) h(remaining)g(4)h(factors)e(also)h(con-)1013 1068 y(tain)e(most)h(of)h (the)f(rele)o(v)o(ant)g(information,)h(b)o(ut)e(that)h(this)1013 1121 y(information)7 b(is)i(coded)g(more)g(\223ef)o(\002ciently\224,)h (so)f(to)f(speak,)1013 1173 y(in)15 b(the)g(\002rst)h(three.)28 b(Speaking)15 b(of)h(these)f(factors,)i(ques-)1013 1225 y(tions)c(concerning)h(the)h(ef)o(fect)h(of)f(sortal)f(class)h (remains.)1013 1278 y(Remarkably)h(enough,)i(when)e(sortal)g(class)g (is)g(omitted,)1013 1330 y(accurac)o(y)g Fq(incr)n(eases)f Fr(by)g Fj(0)p Fl(:)p Fj(7\045)p Fr(.)26 b(The)15 b(increase)h(in)f (AIC)1013 1382 y(can)j(be)h(e)o(xplained)f(by)g(a)h(decrease)g(in)f (the)g(amount)g(of)1013 1435 y(e)o(xplained)13 b(v)o(ariation.)23 b(A)15 b(third)e(result)h(is)g(that)g(informa-)1013 1487 y(tion)d(about)g(the)h Fq(form)f(of)h(the)g(antecedent)f Fr(can)i(substitute)1013 1539 y(for)c(distance)g(information,)h(if)f (that)h(information)e(is)i(miss-)1013 1591 y(ing.)k(Both)d(v)o (ariables)f(code)h(the)h(crucial)f(distinctions)d(be-)1013 1644 y(tween)j(e)o(xpressions)f(that)h(e)o(v)o(oke)g(entities)f(and)h (those)g(that)1013 1696 y(access)j(e)o(v)o(oked)f(entities.)22 b(Furthermore,)16 b(a)e(pronominal)1013 1748 y(antecedent)9 b(tends)g(to)h(occur)g(at)g(a)g(distance)f(of)h(less)g(than)f(2)1013 1801 y(MCUs.)29 b(The)17 b(contrib)o(ution)c(of)k(syntactic)e(function) g(re-)1013 1853 y(mains)j(stable)g(and)h(signi\002cant,)h(albeit)e (comparati)o(v)o(ely)1013 1905 y(unimportant.)1013 1975 y Fp(Pr)o(edictive)31 b(P)o(ower:)45 b Fr(T)l(o)31 b(e)o(v)o(aluate)h (the)f(predicti)o(v)o(e)1013 2028 y(po)o(wer)10 b(of)g(the)h(models)f (computed)g(so)g(far)n(,)h(we)g(determine)1013 2080 y(the)i(percentage) g(of)h(correctly)f(predicted)g(pronouns)e(and)1013 2132 y(NPs.)29 b(The)16 b(performance)i(of)e(the)g(trained)g(models)g(was) 1013 2185 y(compared)11 b(to)g(two)f(v)o(ery)h(simple)g(algorithms:) 1013 2272 y Fp(Algorithm)f(A:)21 b Fr(Always)g(choose)j(the)f(most)h (frequent)1103 2324 y(option)10 b(\(i.e.)15 b(noun\).)1013 2403 y Fp(Algorithm)10 b(B:)21 b Fr(If)28 b(the)e(antecedent)h(is)f(in) h(the)g(same)1103 2456 y(MCU,)12 b(or)e(if)g(it)g(is)g(in)f(the)h(pre)o (vious)f(MCU)i(and)f(there)1103 2508 y(is)g(no)f(ambiguity)m(,)g (choose)g(a)i(pronoun;)d(else)i(choose)1103 2560 y(a)i(noun.)1058 2648 y(T)l(able)17 b(7)h(summarises)f(the)g(results)f(of)i(the)f (compari-)1013 2700 y(son.)27 b(T)l(o)16 b(determine)f(the)h(o)o(v)o (erall)g(predicti)o(v)o(e)f(po)o(wer)g(of)p eop %%Page: 7 7 7 6 bop 119 33 a Fo(e)o(xcluded)p 513 47 2 47 v 394 w(\002t)390 b Fn(\045)10 b Fo(e)o(xplained)g(v)o(ariation)p 513 94 V 538 80 a(AIC)61 b(\045correct)p 845 94 V 50 w(Dist4)49 b(Agree)h(Class)g(PF)o(orm)g(Syn)f(PSyn)h(Ambig)p 94 95 1762 2 v 119 128 a(none)p 513 142 2 47 v 338 w(2686)f(92.6)p 845 142 V 127 w(54.4)67 b(21.1)78 b(5.7)88 b(3.8)110 b(2.3)62 b Fc(0.5)86 b Fo(1.1)119 175 y(Class)p 513 189 V 329 w(2785)49 b Fd(93.3)p 845 189 V 127 w Fo(54.4)67 b(21.1)78 b(n.a.)j(4.7)110 b(2.8)62 b(0.5)86 b(1.1)119 222 y(Agree)p 513 236 V 318 w(2984)49 b Fd(92.6)p 845 236 V 127 w Fo(54.4)67 b(n.a.)92 b Fd(14.3)67 b Fo(6.2)110 b(2.7)62 b(0.6)86 b(1.1)119 269 y(Dist4)p 513 283 V 328 w(3346)49 b(90.2)p 845 283 V 127 w(n.a.)81 b(35.8)d(6.1)88 b Fd(32)120 b Fo(3)93 b(0.8)86 b Fc(0.1)119 316 y Fo(Dist4)9 b(+)i(Class)p 513 330 V 195 w(3443)49 b(90.2)p 845 330 V 127 w(n.a.)81 b(35.8)d(n.a.)j Fd(33.7)89 b Fo(3.4)62 b(0.8)86 b Fc(0.1)119 363 y Fo(Dist4)9 b(+)i(Agree)p 513 377 V 184 w(3597)49 b(89.6)p 845 377 V 127 w(n.a.)81 b(n.a.)92 b Fd(31.4)67 b(35.4)89 b Fo(3.1)62 b(0.8)86 b Fc(0.2)119 410 y Fo(Agree)11 b(+)g(Class)p 513 424 V 183 w(3098)49 b Fd(92.6)p 845 424 V 127 w Fo(54.4)67 b(n.a.)92 b(n.a.)81 b(13.11)68 b(3.5)62 b Fc(0.5)86 b Fo(3.6)119 457 y(Dist4)9 b(+)i(Agree)g(+)g(Class)p 513 471 V 49 w(3739)49 b(89.4)p 845 471 V 127 w(n.a.)81 b(n.a.)92 b(n.a.)81 b Fd(52.62)68 b Fo(4)93 b(0.7)86 b(1.7)0 597 y Fr(T)l(able)12 b(6:)i(Ef)o(fect)d(of)h(lea)o(ving)f(out)g(an)o(y)g (one)h(of)f(the)g(three)h(most)f(important)g(factors)g(on)g(model)g (\002t.)16 b Fq(italics:)d Fr(signi\002cance)0 650 y(is)e Fl(p)h(<)h Fj(0)p Fl(:)p Fj(05)p Fr(,)e(for)g(all)g(other)g(factors,)g Fl(p)h(<)h Fj(0)p Fl(:)p Fj(005)d Fr(or)h(better)n(.)p 0 731 951 2 v 212 778 2 47 v 476 764 a Fd(test)g(data)f(set)p 212 825 V 259 810 a Fo(CF)65 b(CG)f(CK)69 b(CL)p 701 825 V 207 w(all)p 0 826 951 2 v 25 859 a(Alg.)10 b(A)p 212 873 2 47 v 99 w(80.4)50 b(83.8)f(63.8)h(65.4)p 701 873 V 175 w(72.8)25 906 y(Alg.)10 b(B)p 212 920 V 101 w(91.1)50 b Fd(93.0)f Fo(88.6)h(84.7)p 701 920 V 175 w(89.4)25 953 y(Model)p 212 967 V 103 w(96.5)g(92.2)f Fd(91.8)h(90.9)p 701 967 V 49 w Fo(92.6)10 b Fb(\006)h Fo(0.02)25 1000 y(w/o)f(Class)p 212 1014 V 49 w Fd(96.8)50 b Fo(92.4)f(91.7)h(90.7)p 701 1014 V 49 w Fd(93.0)10 b Fb(\006)h Fd(0.01)p 0 1015 951 2 v 0 1142 a Fr(T)l(able)16 b(7:)22 b(Results)14 b(of)i(algorithms)e(vs.)27 b(models)15 b(on)g(test)0 1194 y(data)f(in)f Fj(\045)h Fr(correct)g(prediction)e (if)i(referring)g(e)o(xpression)0 1247 y(is)i(to)h(be)g(pronominalised) e(or)i(not.)30 b(Setup)17 b(for)g(genres:)0 1299 y(model)e(is)h (trained)f(on)g(three)h(genres,)h(tested)d(on)i(the)f(re-)0 1351 y(maining)10 b(one)0 1489 y(the)18 b(model,)h(we)f(used)g(10-fold) e(cross-v)o(alidation.)32 b(Al-)0 1541 y(gorithm)15 b(A)i(always)d (fares)j(worst,)f(while)g(algorithm)f(B,)0 1594 y(which)8 b(is)g(based)g(mainly)g(on)g(distance,)h(the)f(strongest)f(fac-)0 1646 y(tor)17 b(in)g(the)g(model,)i(performs)e(quite)g(well.)32 b(Its)17 b(o)o(v)o(erall)0 1698 y(performance)f(is)e Fj(3)p Fl(:)p Fj(2\045)h Fr(belo)o(w)f(that)g(of)h(the)g(full)g(model,) 0 1751 y(and)e Fj(3)p Fl(:)p Fj(6\045)f Fr(belo)o(w)g(that)h(of)g(the)f (full)h(model)g(without)e(sor)o(-)0 1803 y(tal)h(class)g(information.)k (It)c(e)o(v)o(en)h(outperforms)f(the)g(mod-)0 1855 y(els)g(on)g(CG,)h (which)f(has)g(the)g(lo)o(west)f(percentage)h(of)h(Per)o(-)0 1907 y(sons)e(\()p Fj(12)p Fl(:)p Fj(9\045)g Fr(vs.)h Fj(35\045)g Fr(for)g(CF)h(and)f Fj(43)p Fl(:)p Fj(4\045)f Fr(and)h Fj(43)p Fl(:)p Fj(5\045)0 1960 y Fr(for)g(CL)h(and)f(CK\).)h (F)o(or)g(all)f(other)g(genres,)h(the)f(statistical)0 2012 y(models)d(outperform)g(the)h(simple)f(heuristics.)j(Excluding)0 2064 y(sortal)d(class)h(information)f(can)i(boost)e(prediction)f (perfor)o(-)0 2117 y(mance)15 b(on)f(unseen)g(data)g(by)g(as)g(much)h (as)f Fj(0)p Fl(:)p Fj(4\045)f Fr(for)i(the)0 2169 y(complete)k (corpus.)36 b(The)19 b(apparent)g(contradiction)d(be-)0 2221 y(tween)j(this)e(\002nding)h(and)h(the)f(results)g(reported)h(in)f (the)0 2274 y(pre)o(vious)g(section)g(can)i(be)f(e)o(xplained)g(if)g (we)h(consider)0 2326 y(that)8 b(not)g(only)f(were)i(some)g(sortal)e (classes)h(comparati)o(v)o(ely)0 2378 y(rare)k(in)f(the)f(data)h (\(Property)m(,)h(Ev)o(ent\),)f(b)o(ut)f(that)h(our)g(sortal)0 2431 y(class)g(de\002nition)e(may)j(still)e(be)h(too)f(\002ne-grained.) 45 2491 y(W)l(e)30 b(e)o(v)o(aluated)e(the)h(genre-independence)f(of)h (the)0 2543 y(model)10 b(by)g(training)f(on)h(three)g(genres)g(and)g (testing)f(on)h(the)0 2595 y(fourth.)20 b(The)13 b(results)f(sho)o(w)g (that)h(the)g(model)g(fares)h(quite)0 2648 y(well)c(for)g(genre)g(CF)l (,)i(which)e(is)f(also)h(the)g(genre)g(where)h(the)0 2700 y(o)o(v)o(erall)i(\002t)g(was)g(best)f(\(see)h(T)l(able)g(5\).)20 b(W)l(e)14 b(therefore)f(hy-)1013 771 y(pothesize)7 b(that)i(the)f (decrease)i(in)e(performance)i(is)f(mainly)1013 823 y(due)i(to)f(the)h (model)g(itself,)g(not)f(to)h(the)g(training)f(data.)k(The)1013 875 y(results)h(presented)i(in)f(both)g(T)l(able)h(5)g(and)g(7)g(sho)o (w)f(that)1013 927 y(although)e(the)i(model)g(we)h(ha)o(v)o(e)g(found)e (is)h(not)g(quite)f(as)1013 980 y(genre-independent)8 b(as)i(we)g(would)e(want)h(it)h(to)f(be,)i(it)e(pro-)1013 1032 y(vides)e(a)j(reasonable)e(\002t)h(to)f(all)g(the)h(genres)f(we)h (e)o(xamined.)1013 1147 y Fu(5)49 b(Futur)o(e)12 b(W)l(ork)1013 1230 y Fr(W)l(e)d(ha)o(v)o(e)h(described)e(a)h(probabilistic)e(model)h (of)h(pronom-)1013 1282 y(inalization)k(that)i(is)h(able)f(to)g (correctly)h(predict)f(93)p Fj(\045)g Fr(of)1013 1334 y(all)9 b(pronouns)f(in)i(a)g(corpus)f(that)g(consists)f(of)i(twelv)o (e)g(te)o(xts)1013 1387 y(from)h(four)f(dif)o(ferent)g(genres.)k(Since) d(the)f(model)h(was)f(de-)1013 1439 y(ri)o(v)o(ed)g(from)h(a)g(limited) e(corpus)h(and)g(a)h(limited)f(number)g(of)1013 1491 y(genres,)i(we)g(cannot)f(guarantee)g(that)g(our)h(results)f(are)h(ap-) 1013 1544 y(plicable)c(to)h(all)g(te)o(xts)g(without)e (modi\002cations.)12 b(But)e(since)1013 1596 y(its)f(performance)j(on)e (our)g(sample)h(is)f(consistently)e(abo)o(v)o(e)1013 1648 y(90)p Fj(\045)15 b Fr(correct,)j(we)e(are)g(reasonably)f (con\002dent)g(that)g(our)1013 1700 y(main)g(\002ndings)f(will)g(hold)g (for)i(a)f(wide)g(v)o(ariety)g(of)g(te)o(xts)1013 1753 y(and)9 b(te)o(xt)h(types.)j(In)c(particular)n(,)h(we)g(isolated)f(se)o (v)o(eral)h(fac-)1013 1805 y(tors)k(which)h(are)h(rob)o(ust)e (predictors)g(of)h(pronominaliza-)1013 1857 y(tion)e(across)h(genres:) 19 b(distance)14 b(from)g(last)g(mention)f(and)1013 1910 y(agreement,)k(and)f(to)f(a)h(certain)g(e)o(xtent)f(the)g(form)i(of)e (the)1013 1962 y(antecedent,)c(which)f(appears)h(to)g(be)g(a)g(good)g (substitute)d(if)1013 2014 y(the)h(other)g(two)f(factors)h(are)h(not)f (a)o(v)o(ailable.)k(All)c(three)g(fea-)1013 2067 y(tures)f(can)i(be)g (computed)e(on)h(the)g(basis)g(of)g(a)h(chunk)f(parse,)1013 2119 y(a)18 b(rough)f(morphosyntactic)f(analysis)h(of)h(the)g (resulting)1013 2171 y(NPs,)f(and)e(co-speci\002cation)g(sequences.)27 b(In)15 b(computa-)1013 2224 y(tional)f(terms,)j(the)o(y)f(are)g (comparati)o(v)o(ely)f(cheap.)28 b(Lar)o(ge)1013 2276 y(corpora)8 b(can)h(be)g(annotated)f(relati)o(v)o(ely)g(quickly)f(with) h(this)1013 2328 y(information,)15 b(which)f(can)i(then)e(be)h(used)g (for)g(statistical)1013 2380 y(pronoun)9 b(generation.)1058 2438 y(The)18 b(comparati)o(v)o(ely)f(e)o(xpensi)o(v)o(e)h(sortal)f (class)h(anno-)1013 2491 y(tation,)f(on)g(the)h(other)e(hand,)j(was)e (not)g(v)o(ery)g(important)1013 2543 y(in)d(the)g(\002nal)h(model;)h (in)e(fact,)i(prediction)d(accurac)o(y)j(de-)1013 2595 y(creased)23 b(when)h(sortal)e(class)h(was)g(included.)50 b(There)1013 2648 y(are)18 b(two)e(main)h(reasons)g(for)h(this:)25 b(\002rst,)19 b(the)e(proposed)1013 2700 y(sortal)c(class)h(annotation) f(scheme)i(needs)f(further)h(work,)p eop %%Page: 8 8 8 7 bop 0 46 a Fr(second,)22 b(the)e(relationship)e(between)h(sortal)g (class)h(and)0 98 y(pronominalization)14 b(may)k(well)e(be)h(too)f (intricate)g(to)h(be)0 150 y(modelled)11 b(by)f(the)h(factor)g(Class)g (alone.)45 205 y(W)l(e)22 b(set)e(out)g(to)g(\002nd)h(a)g (genre-independent)e(model)0 258 y(of)h(pronominalization.)37 b(The)20 b(model)f(we)h(found)f(per)o(-)0 310 y(forms)10 b(quite)g(well,)g(b)o(ut)f(genre)h(still)f(considerably)g(af)o(fects)0 362 y(its)k(performance.)24 b(Where)15 b(does)e(the)h(remaining,)h(une) o(x-)0 415 y(plained)f(v)o(ariation)f(come)i(from?)26 b(The)14 b(v)o(ariation)f(might)0 467 y(be)f(just)f(that)h(\226)g (stylistic)e(v)o(ariation.)16 b(It)c(might)g(stem)g(from)0 519 y(one)17 b(of)g(the)g(traditional)e(factors)i(that)f(we)h(did)g (not)f(take)0 572 y(into)c(account)h(here,)i(such)e(as)g(thematic)h (role.)20 b(Ho)o(we)o(v)o(er)n(,)0 624 y(we)11 b(suspect)f(that)g(the)h (crucial)f(factor)h(at)g(play)f(here)h(is)f(dis-)0 676 y(course)h(structure)f(\(McCoy)i(&)g(Strube,)f(1999\).)0 752 y Fp(Acknowledgements)45 b Fr(W)l(ork)15 b(on)g(this)f(paper)h(was) f(be-)0 804 y(gun)9 b(while)g(Michael)h(Strube)g(was)f(a)i (postdoctoral)d(fello)o(w)0 856 y(at)16 b(the)g(Institute)f(for)h (Research)h(in)f(Cogniti)o(v)o(e)f(Science,)0 909 y(Uni)o(v)o(ersity)10 b(of)j(Pennsylv)o(ania,)e(and)h(Maria)h(W)l(olters)e(vis-)0 961 y(ited)k(the)g(Institute)f(for)h(a)h(week)g(in)f(summer)h(1999.)26 b(W)l(e)0 1013 y(would)10 b(like)g(to)h(thank)g(Kathleen)g(McCoy)m(,)h (Jonathan)e(De-)0 1066 y(Cristofaro,)16 b(and)f(the)g(three)g(anon)o (ymous)g(re)o(vie)o(wers)g(for)0 1118 y(their)c(comments)g(on)g (earlier)g(stages)g(of)g(this)f(work.)0 1223 y Fu(Refer)o(ences)0 1293 y Fo(Agresti,)j(Alan)f(\(1990\).)21 b Fc(Cate)n(gorical)11 b(Data)h(Analysis)p Fo(.)22 b(Ne)o(w)45 1340 y(Y)-5 b(ork,)11 b(N.Y)-5 b(.:)13 b(W)n(ile)o(y)m(.)0 1387 y(Akaike,)22 b(H.)e(\(1974\).)43 b(A)20 b(ne)o(w)f(look)g(at)g(statistical)g(model) 45 1433 y(identi\002cation.)e Fc(IEEE)12 b(T)n(r)o(ansactions)e(A)o (utomatic)g(Contr)n(ol)p Fo(,)45 1480 y(19:716\226722.)0 1527 y(Azzam,)k(Saliha,)d(K)o(e)o(vin)g(Humphre)o(ys)g(&)h(Robert)e (Gaizauskas)45 1574 y(\(1998\).)74 b(Ev)o(aluating)28 b(a)i(focus-based)f(approach)g(to)45 1621 y(anaphora)15 b(resolution.)26 b(In)15 b Fc(Pr)n(oceedings)g(of)e(the)i(17)842 1606 y Fa(th)889 1621 y Fc(In-)45 1668 y(ternational)9 b(Confer)n(ence)j(on)f(Computational)c(Linguistics)45 1715 y(and)j(36)161 1700 y Fa(th)204 1715 y Fc(Annual)f(Meeting)g(of)g (the)h(Association)f(for)g(Com-)45 1762 y(putational)19 b(Linguistics,)j Fo(Montr)r(\264)-16 b(eal,)24 b(Qu)r(\264)-16 b(ebec,)26 b(Canada,)45 1809 y(10\22614)10 b(August)f(1998,)h(pp.)g (74\22678.)0 1856 y(Breiman,)j(Leo,)h(Jerome)f(H.)f(Friedman,)i (Charles)e(J.)g(Stone)g(&)45 1903 y(R.A.)18 b(Olshen)e(\(1984\).)35 b Fc(Classi\002cation)14 b(and)i(Re)n(gr)n(ession)45 1949 y(T)n(r)n(ees)p Fo(.)h(Belmont,)10 b(Cal.:)i(W)m(adsworth)d(and)i (Brooks/Cole.)0 1996 y(Cardie,)20 b(Claire)d(&)h(Kiri)e(W)m(agstaf)o(f) i(\(1999\).)37 b(Noun)17 b(phrase)45 2043 y(coreference)c(as)f (clustering.)k(In)11 b Fc(Pr)n(oceedings)g(of)g(the)f(1999)45 2090 y(SIGD)o(A)n(T)f(Confer)n(ence)g(on)f(Empirical)g(Methods)f(in)h (Natur)o(al)45 2137 y(Language)13 b(Pr)n(ocessing)h(and)f(V)-5 b(ery)16 b(Lar)n(ge)e(Corpor)o(a,)h Fo(Col-)45 2184 y(le)o(ge)c(P)o (ark,)h(Md.,)e(21\22622)g(June)g(1999,)g(pp.)g(82\22689.)0 2231 y(Carletta,)k(Jean)g(\(1996\).)23 b(Assessing)13 b(agreement)i(on)d(classi\002-)45 2278 y(cation)f(tasks:)j(The)e(kappa) f(statistic.)17 b Fc(Computational)7 b(Lin-)45 2325 y(guistics)p Fo(,)j(22\(2\):249\226254.)0 2372 y(Cohen,)19 b(Jacob)f(\(1960\).)36 b(A)17 b(coef)o(\002cient)h(of)f(agreement)h(for)45 2419 y(nominal)10 b(scales.)18 b Fc(Educational)9 b(and)h(Psyc)o(hological)g (Mea-)45 2465 y(sur)n(ement)p Fo(,)i(20:37\22646.)0 2512 y(Dale,)f(Robert)e(\(1992\).)k Fc(Gener)o(ating)c(Referring)i(Expr)n (essions:)45 2559 y(Constructing)18 b(Descriptions)g(in)h(a)h(Domain)e (of)h(Objects)45 2606 y(and)10 b(Pr)n(ocesses)p Fo(.)18 b(Cambridge,)10 b(Mass.:)j(MIT)e(Press.)0 2653 y(DeCristofaro,)16 b(Jonathan,)g(Michael)g(Strube)f(&)h(Kathleen)f(F)m(.)45 2700 y(McCoy)h(\(1999\).)34 b(Building)14 b(a)j(tool)f(for)g (annotating)f(ref-)1058 46 y(erence)k(in)e(discourse.)39 b(In)17 b Fc(A)o(CL)h('99)f(W)l(orkshop)g(on)g(the)1058 93 y(Relationship)h(between)i(Discourse/Dialogue)d(Structur)n(e)1058 139 y(and)9 b(Refer)n(ence,)k(University)d(of)f(Maryland,)g(Maryland,)g (21)1058 186 y(J)o(une,)h(1999)p Fo(,)g(pp.)g(54\22662.)1013 233 y(Fellbaum,)k(Christiane)f(\(Ed.\))h(\(1998\).)25 b Fc(W)l(or)n(dNet:)19 b(An)14 b(Elec-)1058 280 y(tr)n(onic)j(Le)o (xical)i(Database)p Fo(.)38 b(Cambridge,)20 b(Mass.:)29 b(MIT)1058 327 y(Press.)1013 374 y(Fraurud,)11 b(Kari)g(\(1996\).)18 b(Cogniti)o(v)o(e)9 b(ontology)g(and)i(NP)h(form.)1058 421 y(In)19 b(T)m(.)h(Fretheim)f(&)h(J.)f(Gundel)g(\(Eds.\),)j Fc(Refer)n(ence)g(and)1058 468 y(Refer)n(ent)e(Accessibility)p Fo(,)h(pp.)e(65\22687.)f(Amsterdam,)j(The)1058 515 y(Netherlands:)12 b(Benjamins.)1013 562 y(Ge,)g(Niyu,)f(John)f(Hale)i(&)g(Eugene)f (Charniak)g(\(1998\).)17 b(A)11 b(sta-)1058 609 y(tistical)g(approach)i (to)e(anaphora)i(resolution.)20 b(In)12 b Fc(Pr)n(oceed-)1058 655 y(ings)j(of)g(the)h(Sixth)f(W)l(orkshop)g(on)g(V)-5 b(ery)18 b(Lar)n(ge)e(Corpor)o(a,)1058 702 y Fo(Montr)r(\264)-16 b(eal,)10 b(Canada,)h(pp.)f(161\226170.)1013 749 y(Grosz,)15 b(Barbara)g(J.,)h(Ara)o(vind)e(K.)g(Joshi)g(&)g(Scott)g(W)m(einstein) 1058 796 y(\(1995\).)h(Centering:)d(A)e(frame)o(work)h(for)f(modeling)f (the)i(lo-)1058 843 y(cal)k(coherence)h(of)e(discourse.)28 b Fc(Computational)11 b(Linguis-)1058 890 y(tics)p Fo(,)f (21\(2\):203\226225.)1013 937 y(Hirschman,)15 b(L)n(ynette)f(&)g(Nanc)o (y)h(Chinchor)e(\(1997\).)26 b Fc(MUC-)1058 984 y(7)34 b(Cor)n(efer)n(ence)j(T)l(ask)e(De\002nition,)k Fs(http://www.)1058 1031 y(muc.sais.com/proceedings/)p Fo(.)1013 1078 y(Ihaka,)14 b(Ross)f(&)h(Ross)f(Gentleman)h(\(1996\).)23 b(R:)13 b(A)g(language)1058 1125 y(for)g(data)g(analysis)g(and)g(graphics.)24 b Fc(J)o(ournal)12 b(of)h(Computa-)1058 1171 y(tional)8 b(and)i(Gr)o(aphical)f(Statistics)p Fo(,)g(5:299\226314.)1013 1218 y(K)o(essler)n(,)24 b(Brett,)e(Geof)o(fre)o(y)f(Nunber)o(g)f(&)h (Hinrich)f(Sch)s(\250)-17 b(utze)1058 1265 y(\(1997\).)9 b(Automatic)e(detection)g(of)h(te)o(xt)g(genre.)i(In)e Fc(Pr)n(oceed-)1058 1312 y(ings)16 b(of)f(the)i(35)1302 1297 y Fa(th)1352 1312 y Fc(Annual)e(Meeting)g(of)h(the)g(Association) 1058 1359 y(for)11 b(Computational)d(Linguistics)j(and)g(of)g(the)h(8) 1776 1344 y Fa(th)1822 1359 y Fc(Confer)o(-)1058 1406 y(ence)j(of)e(the)g(Eur)n(opean)h(Chapter)f(of)g(the)g(Association)g (for)1058 1453 y(Computational)8 b(Linguistics,)j Fo(Madrid,)h(Spain,)g (7\22612)f(July)1058 1500 y(1997,)f(pp.)g(32\22638.)1013 1547 y(LDC)f(\(1995\).)j Fc(P)m(enn)e(Tr)n(eebank-II)p Fo(.)k(Linguistic)8 b(Data)i(Consor)o(-)1058 1594 y(tium.)g(Uni)o(v)o (ersity)f(of)h(Pennsylv)o(ania,)g(Philadelphia,)f(Penn.)1013 1641 y(McCoy)m(,)14 b(Kathleen)h(F)m(.)f(&)g(Michael)h(Strube)e (\(1999\).)27 b(Gener)o(-)1058 1687 y(ating)13 b(anaphoric)g(e)o (xpressions:)19 b(Pronoun)13 b(or)g(de\002nite)g(de-)1058 1734 y(scription?)24 b(In)12 b Fc(A)o(CL)f('99)h(W)l(orkshop)f(on)g (the)h(Relationship)1058 1781 y(between)c(Discourse/Dialogue)e (Structur)n(e)i(and)g(Refer)n(ence,)1058 1828 y(University)h(of)f (Maryland,)h(Maryland,)f(21)h(J)o(une,)g(1999)p Fo(,)g(pp.)1058 1875 y(63\22671.)1013 1922 y(Poesio,)16 b(Massimo,)h(Renate)e (Henschel,)j(Janet)d(Hitzeman)h(&)1058 1969 y(Rodger)d(Kibble)g (\(1999\).)26 b(Statistical)12 b(NP)i(generation:)19 b(A)1058 2016 y(\002rst)14 b(report.)29 b(In)14 b(R.)h(Kibble)f(&)h(K.) g(v)o(an)g(Deemter)g(\(Eds.\),)1058 2063 y Fc(Pr)n(oceedings)i(of)f (the)g(W)l(orkshop)f(on)h(The)h(Gener)o(ation)f(of)1058 2110 y(Nominal)9 b(Expr)n(essions,)k(11th)d(Eur)n(opean)h(Summer)g(Sc)o (hool)1058 2156 y(on)j(Logic,)g(Language,)g(and)f(Information,)g(Utr)n (ec)o(ht,)j(9-13)1058 2203 y(A)o(ugust)10 b(1999)p Fo(.)1013 2250 y(Sidner)n(,)19 b(Candace)g(L.)g(\(1983\).)38 b(F)o(ocusing)17 b(in)g(the)h(compre-)1058 2297 y(hension)e(of)g(de\002nite)g(anaphora.) 34 b(In)16 b(M.)h(Brady)f(&)h(R.C.)1058 2344 y(Berwick)11 b(\(Eds.\),)i Fc(Computational)8 b(Models)j(of)g(Discourse)p Fo(,)1058 2391 y(pp.)f(267\226330.)f(Cambridge,)h(Mass.:)k(MIT)c (Press.)1013 2438 y(V)-5 b(ossen,)9 b(Piek)f(\(Ed.\))h(\(1998\).)i Fc(Eur)n(oW)l(or)n(dNet:)g(A)e(Multilingua)o(l)1058 2485 y(Database)e(with)g(Le)o(xical)h(Semantic)f(Networks)p Fo(.)j(Dordrecht,)1058 2532 y(The)h(Netherlands:)h(Kluwer)n(.)1013 2579 y(Y)l(eh,)j(Ching-Long)e(&)i(Chris)e(Mellish)g(\(1997\).)28 b(An)14 b(empiri-)1058 2626 y(cal)i(study)e(on)h(the)g(generation)g(of) g(anaphora)g(in)g(Chinese.)1058 2672 y Fc(Computational)6 b(Linguistics)p Fo(,)j(23\(1\):169\226190.)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF