-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathslides-dissertation-defense.bib
More file actions
169 lines (156 loc) · 10.8 KB
/
slides-dissertation-defense.bib
File metadata and controls
169 lines (156 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
% Encoding: UTF-8
@Article{1977DempsterLairdRubin,
author = {Dempster, Arthur P. and Laird, Nan M. and Rubin, Donald B.},
title = {Maximum Likelihood from Incomplete Data via the EM Algorithm},
journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
year = {1977},
volume = {39},
number = {1},
pages = {1–38},
issn = {00359246},
abstract = {A broadly applicable algorithm for computing maximum likelihood estimates from incomplete data is presented at various levels of generality. Theory showing the monotone behaviour of the likelihood and convergence of the algorithm is derived. Many examples are sketched, including missing value situations, applications to grouped, censored or truncated data, finite mixture models, variance component estimation, hyperparameter estimation, iteratively reweighted least squares and factor analysis.},
copyright = {Copyright © 1977 Royal Statistical Society},
file = {:1977__dempster_laird_rubin__maximum_likelihood_from_incomplete_data_via_the_em_algorithm.pdf:PDF},
groups = {Expectation Maximization Algorithm, EzmÜnS},
jstor_articletype = {research-article},
jstor_formatteddate = {1977},
language = {English},
owner = {Toni Dietze},
publisher = {Wiley for the Royal Statistical Society},
timestamp = {2013-03-18},
url = {http://www.jstor.org/stable/2984875},
}
@Article{2001CarrascoOncinaCalera-Rubio,
author = {Carrasco, Rafael C. and Oncina, Jose and Calera-Rubio, Jorge},
title = {Stochastic Inference of Regular Tree Languages},
journal = {Machine Learning},
year = {2001},
volume = {44},
number = {1-2},
pages = {185–197},
issn = {0885-6125},
abstract = {We generalize a former algorithm for regular language identification from stochastic samples to the case of tree languages. It can also be used to identify context-free languages when structural information about the strings is available. The procedure identifies equivalent subtrees in the sample and outputs the hypothesis in linear time with the number of examples. The results are evaluated with a method that computes efficiently the relative entropy between the target grammar and the inferred one.},
comment = {cf. [1998CarrascoOncinaCalera]},
doi = {10.1023/A:1010836331703},
file = {:2001__carrasco_oncina_calera-rubio__stochastic_inference_of_regular_tree_languages.pdf:PDF},
groups = {grammatical inference},
keywords = {grammatical inference; stochastic grammars; three languages},
language = {English},
owner = {Toni Dietze},
publisher = {Kluwer Academic Publishers},
timestamp = {2015-05-07},
url = {http://dx.doi.org/10.1023/A%3A1010836331703},
}
@InCollection{2004CarmeNiehrenTommasi,
author = {Carme, Julien and Niehren, Joachim and Tommasi, Marc},
title = {Querying Unranked Trees with Stepwise Tree Automata},
booktitle = {Rewriting Techniques and Applications},
publisher = {Springer Berlin Heidelberg},
year = {2004},
editor = {van Oostrom, Vincent},
volume = {3091},
series = {Lecture Notes in Computer Science},
pages = {105–118},
isbn = {978-3-540-22153-1},
abstract = {The problem of selecting nodes in unranked trees is the most basic querying problem for XML. We propose stepwise tree automata for querying unranked trees. Stepwise tree automata can express the same monadic queries as monadic Datalog and monadic second-order logic. We prove this result by reduction to the ranked case, via a new systematic correspondence that relates unranked and ranked queries.},
doi = {10.1007/978-3-540-25979-4_8},
file = {:2004__carme_niehren_tommasi__querying_unranked_trees_with_stepwise_tree_automata.pdf:PDF},
groups = {unranked formalisms},
language = {English},
owner = {Toni Dietze},
timestamp = {2015-10-23},
url = {http://dx.doi.org/10.1007/978-3-540-25979-4_8},
}
@InProceedings{2005MatsuzakiMiyaoTsujii,
author = {Matsuzaki, Takuya and Miyao, Yusuke and Tsujii, Jun'ichi},
title = {Probabilistic CFG with latent annotations},
booktitle = {Proc.\ of 43rd Annual Meeting of ACL},
year = {2005},
series = {ACL '05},
pages = {75–82},
address = {Stroudsburg, PA, USA},
publisher = {Association for Computational Linguistics},
abstract = {This paper defines a generative probabilistic model of parse trees, which we call PCFG-LA. This model is an extension of PCFG in which non-terminal symbols are augmented with latent variables. Fine-grained CFG rules are automatically induced from a parsed corpus by training a PCFG-LA model using an EM-algorithm. Because exact parsing with a PCFG-LA is NP-hard, several approximations are described and empirically compared. In experiments using the Penn WSJ corpus, our automatically trained model gave a performance of 86.6\% (F1, sentences ≤ 40 words), which is comparable to that of an unlexicalized PCFG parser created using extensive manual feature selection.},
acmid = {1219850},
comment = {binarization},
doi = {10.3115/1219840.1219850},
file = {:2005__matsuzaki_miyao_tsujii__probabilistic_cfg_with_latent_annotations.pdf:PDF},
groups = {State-Splitting},
location = {Ann Arbor, Michigan},
numpages = {8},
owner = {Toni Dietze},
timestamp = {2013-02-05},
url = {http://dx.doi.org/10.3115/1219840.1219850},
}
@InProceedings{2006PetrovBarrettThibauxKlein,
author = {Petrov, Slav and Barrett, Leon and Thibaux, Romain and Klein, Dan},
title = {Learning accurate, compact, and interpretable tree annotation},
booktitle = {COLING/ACL},
year = {2006},
series = {ACL-44},
pages = {433–440},
address = {Stroudsburg, PA, USA},
publisher = {Association for Computational Linguistics},
abstract = {We present an automatic approach to tree annotation in which basic nonterminal symbols are alternately split and merged to maximize the likelihood of a training treebank. Starting with a simple X-bar grammar, we learn a new grammar whose nonterminals are subsymbols of the original nonterminals. In contrast with previous work, we are able to split various terminals to different degrees, as appropriate to the actual complexity in the data. Our grammars automatically learn the kinds of linguistic distinctions exhibited in previous work on manual tree annotation. On the other hand, our grammars are much more compact and substantially more accurate than previous work on automatic annotation. Despite its simplicity, our best grammar achieves an $\mathrm{F}_1$ of 90.2\% on the Penn Treebank, higher than fully lexicalized systems.},
acmid = {1220230},
doi = {10.3115/1220175.1220230},
file = {Paper:2006__petrov_barrett_thibaux_klein__learning_accurate_compact_and_interpretable_tree_annotation.pdf:PDF;Slides:2006__petrov_barrett_thibaux_klein__learning_accurate_compact_and_interpretable_tree_annotation__slides.ppt:PowerPoint},
groups = {Korrespondenz Nederhof, State-Splitting, done, dissertation.bib},
localfile = {2006__petrov_barrett_thibaux_klein__learning_accurate_compact_and_interpretable_tree_annotation.pdf},
location = {Sydney, Australia},
numpages = {8},
owner = {Toni Dietze},
timestamp = {2012-02-10},
url = {http://dx.doi.org/10.3115/1220175.1220230},
}
@InProceedings{2015DietzeNederhof,
author = {Dietze, Toni and Nederhof, Mark-Jan},
title = {Count-based State Merging for Probabilistic Regular Tree Grammars},
booktitle = {Proc.\ of the FSMNLP Conference},
year = {2015},
month = jun,
abstract = {We present an approach to obtain language models from a tree corpus using probabilistic regular tree grammars (prtg). Starting with a prtg only generating trees from the corpus, the prtg is generalized step by step by merging nonterminals. We focus on bottom-up deterministic prtg to simplify the calculations.},
file = {:2015__dietze_nederhof__count-based_state_merging_for_probabilistic_regular_tree_grammars.pdf:PDF},
groups = {own publications},
owner = {Toni Dietze},
timestamp = {2016-07-18},
url = {https://aclanthology.info/papers/W15-4804/w15-4804},
}
@InProceedings{2016Dietze,
author = {Dietze, Toni},
title = {Equivalences between Ranked and Unranked Weighted Tree Automata via Binarization},
booktitle = {Proc.\ of the StatFSM Workshop (ACL SIGFSM)},
year = {2016},
pages = {1–10},
publisher = {Association for Computational Linguistics},
comment = {Issues:
Section 3.1/related: We need the wsa in the wuta to share state set, transitions, and final weights instead of having disjoint sets. Otherwise f in the proof of Theorem 7 is no bijection.
Section 3.3/Theorem 8: t ∈ U_Σ instead of t ∈ T_Σ
Section 4.2: For mixed binarization, the wfsa in the constructed wuta are not consistent.},
file = {submitted version:2016__dietze__equivalences_between_ranked_and_unranked_weighted_tree_automata_via_binarization__submitted_version.pdf:PDF;published version:2016__dietze__equivalences_between_ranked_and_unranked_weighted_tree_automata_via_binarization.pdf:PDF},
groups = {own publications},
location = {Berlin, Germany},
owner = {Toni Dietze},
timestamp = {2016-08-08},
url = {https://aclanthology.info/papers/W16-2401/w16-2401},
}
@InBook{2016OsterholzerDietzeHerrmann,
pages = {478–489},
title = {Linear Context-Free Tree Languages and Inverse Homomorphisms},
publisher = {Springer International Publishing},
year = {2016},
author = {Osterholzer, Johannes and Dietze, Toni and Herrmann, Luisa},
editor = {Dediu, Adrian-Horia and Janou{\v{s}}ek, Jan and Mart{\'i}n-Vide, Carlos and Truthe, Bianca},
address = {Cham},
isbn = {978-3-319-30000-9},
abstract = {We prove that the class of linear context-free tree languages is not closed under inverse linear tree homomorphisms. The proof is by contradiction: we encode Dyck words into a context-free tree language and prove that its preimage under a certain linear tree homomorphism cannot be generated by any context-free tree grammar. However, the closure can be proved for the linear monadic context-free tree languages.},
booktitle = {Proc.\ of LATA Conference},
doi = {10.1007/978-3-319-30000-9_37},
file = {:2016__osterholzer_dietze_herrmann__linear_context-free_tree_languages_and_inverse_homomorphisms.pdf:PDF},
groups = {own publications},
owner = {Toni Dietze},
timestamp = {2016-07-18},
url = {http://dx.doi.org/10.1007/978-3-319-30000-9_37},
}
@Comment{jabref-meta: databaseType:bibtex;}
@Comment{jabref-meta: saveOrderConfig:specified;year;false;author;false;bibtexkey;false;}