Universal Dependencies

home edit page issue tracker

This page pertains to UD version 2.

It appears that you have Javascript disabled. Please consider enabling Javascript for this page to see the visualizations.

Introduction

Universal Dependencies (UD) is a project that is developing cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on an evolution of (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008). The general philosophy is to provide a universal inventory of categories and guidelines to facilitate consistent annotation of similar constructions across languages, while allowing language-specific extensions when necessary.

This is illustrated in the following parallel examples from English, Bulgarian, Czech and Swedish, where the main grammatical relations involving a passive verb, a nominal subject and an oblique agent are the same, but where the concrete grammatical realization varies.



  
    input
    brat
    info
  
# visual-style 4 2 nsubj:pass	color:blue
# visual-style 4 7 obl	color:blue
1	The	the	DET	_	Definite=Def|PronType=Art	2	det	_	_
2	dog	dog	NOUN	_	Gender=Neut|Number=Sing	4	nsubj:pass	_	_
3	was	be	AUX	_	Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin	4	aux:pass	_	_
4	chased	chase	VERB	_	Tense=Past|VerbForm=Part	0	ROOT	_	_
5	by	by	ADP	_	_	7	case	_	_
6	the	the	DET	_	Definite=Def|PronType=Art	7	det	_	_
7	cat	cat	NOUN	_	Gender=Neut|Number=Sing	4	obl	_	_
8	.	.	PUNCT	_	_	4	punct	_	_
{
    "entities": [
        [ "embedded-1-S1-T1", "DET", [ [ 0, 3 ] ] ],
        [ "embedded-1-S1-T2", "NOUN", [ [ 4, 7 ] ] ],
        [ "embedded-1-S1-T3", "AUX", [ [ 8, 11 ] ] ],
        [ "embedded-1-S1-T4", "VERB", [ [ 12, 18 ] ] ],
        [ "embedded-1-S1-T5", "ADP", [ [ 19, 21 ] ] ],
        [ "embedded-1-S1-T6", "DET", [ [ 22, 25 ] ] ],
        [ "embedded-1-S1-T7", "NOUN", [ [ 26, 29 ] ] ],
        [ "embedded-1-S1-T8", "PUNCT", [ [ 30, 31 ] ] ]
    ],
    "attributes": [ [ "embedded-1-S1-A1", "Definite", "embedded-1-S1-T1", "Def" ], [ "embedded-1-S1-A2", "PronType", "embedded-1-S1-T1", "Art" ], [ "embedded-1-S1-A3", "Gender", "embedded-1-S1-T2", "Neut" ], [ "embedded-1-S1-A4", "Number", "embedded-1-S1-T2", "Sing" ], [ "embedded-1-S1-A5", "Mood", "embedded-1-S1-T3", "Ind" ], [ "embedded-1-S1-A6", "Number", "embedded-1-S1-T3", "Sing" ], [ "embedded-1-S1-A7", "Tense", "embedded-1-S1-T3", "Past" ], [ "embedded-1-S1-A8", "VerbForm", "embedded-1-S1-T3", "Fin" ], [ "embedded-1-S1-A9", "Tense", "embedded-1-S1-T4", "Past" ], [ "embedded-1-S1-A10", "VerbForm", "embedded-1-S1-T4", "Part" ], [ "embedded-1-S1-A11", "Definite", "embedded-1-S1-T6", "Def" ], [ "embedded-1-S1-A12", "PronType", "embedded-1-S1-T6", "Art" ], [ "embedded-1-S1-A13", "Gender", "embedded-1-S1-T7", "Neut" ], [ "embedded-1-S1-A14", "Number", "embedded-1-S1-T7", "Sing" ] ],
    "relations": [
        [ "embedded-1-S1-R0", "det", [ [ "arg1", "embedded-1-S1-T2" ], [ "arg2", "embedded-1-S1-T1" ] ] ],
        [ "embedded-1-S1-R1", "nsubj:pass", [ [ "arg1", "embedded-1-S1-T4" ], [ "arg2", "embedded-1-S1-T2" ] ] ],
        [ "embedded-1-S1-R2", "aux:pass", [ [ "arg1", "embedded-1-S1-T4" ], [ "arg2", "embedded-1-S1-T3" ] ] ],
        [ "embedded-1-S1-R3", "ROOT", [ [ "arg1", "embedded-1-S1-T0" ], [ "arg2", "embedded-1-S1-T4" ] ] ],
        [ "embedded-1-S1-R4", "case", [ [ "arg1", "embedded-1-S1-T7" ], [ "arg2", "embedded-1-S1-T5" ] ] ],
        [ "embedded-1-S1-R5", "det", [ [ "arg1", "embedded-1-S1-T7" ], [ "arg2", "embedded-1-S1-T6" ] ] ],
        [ "embedded-1-S1-R6", "obl", [ [ "arg1", "embedded-1-S1-T4" ], [ "arg2", "embedded-1-S1-T7" ] ] ],
        [ "embedded-1-S1-R7", "punct", [ [ "arg1", "embedded-1-S1-T4" ], [ "arg2", "embedded-1-S1-T8" ] ] ]
    ],
    "comments": [ [ "embedded-1-S1-T1", "AnnotatorNotes", "Lemma: the" ], [ "embedded-1-S1-T2", "AnnotatorNotes", "Lemma: dog" ], [ "embedded-1-S1-T3", "AnnotatorNotes", "Lemma: be" ], [ "embedded-1-S1-T4", "AnnotatorNotes", "Lemma: chase" ], [ "embedded-1-S1-T5", "AnnotatorNotes", "Lemma: by" ], [ "embedded-1-S1-T6", "AnnotatorNotes", "Lemma: the" ], [ "embedded-1-S1-T7", "AnnotatorNotes", "Lemma: cat" ], [ "embedded-1-S1-T8", "AnnotatorNotes", "Lemma: ." ] ],
    "styles": [ [ [ "embedded-1-S1-T4", "embedded-1-S1-T2", "nsubj:pass" ], "color", "blue" ], [ [ "embedded-1-S1-T4", "embedded-1-S1-T7", "obl" ], "color", "blue" ] ],
    "sentlabels": [ "1" ],
    "text": "The dog was chased by the cat .",
    "error": false
}



  
    input
    brat
    info
  
# visual-style 3 1 nsubj:pass	color:blue
# visual-style 3 5 obl	color:blue
1	Кучето	куче	NOUN	_	Definite=Def|Gender=Neut|Number=Sing	3	nsubj:pass	_	_
2	се	се	PRON	_	Case=Acc|PronType=Prs|Reflex=Yes	3	expl:pass	_	_
3	преследваше	преследвам	VERB	_	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin	0	root	_	_
4	от	от	ADP	_	_	5	case	_	_
5	котката	котка	NOUN	_	Definite=Def|Gender=Fem|Number=Sing	3	obl	_	_
6	.	.	PUNCT	_	_	3	punct	_	_
{
    "entities": [
        [ "embedded-2-S1-T1", "NOUN", [ [ 0, 6 ] ] ],
        [ "embedded-2-S1-T2", "PRON", [ [ 7, 9 ] ] ],
        [ "embedded-2-S1-T3", "VERB", [ [ 10, 21 ] ] ],
        [ "embedded-2-S1-T4", "ADP", [ [ 22, 24 ] ] ],
        [ "embedded-2-S1-T5", "NOUN", [ [ 25, 32 ] ] ],
        [ "embedded-2-S1-T6", "PUNCT", [ [ 33, 34 ] ] ]
    ],
    "attributes": [ [ "embedded-2-S1-A1", "Definite", "embedded-2-S1-T1", "Def" ], [ "embedded-2-S1-A2", "Gender", "embedded-2-S1-T1", "Neut" ], [ "embedded-2-S1-A3", "Number", "embedded-2-S1-T1", "Sing" ], [ "embedded-2-S1-A4", "Case", "embedded-2-S1-T2", "Acc" ], [ "embedded-2-S1-A5", "PronType", "embedded-2-S1-T2", "Prs" ], [ "embedded-2-S1-A6", "Reflex", "embedded-2-S1-T2", "Yes" ], [ "embedded-2-S1-A7", "Aspect", "embedded-2-S1-T3", "Imp" ], [ "embedded-2-S1-A8", "Mood", "embedded-2-S1-T3", "Ind" ], [ "embedded-2-S1-A9", "Number", "embedded-2-S1-T3", "Sing" ], [ "embedded-2-S1-A10", "Person", "embedded-2-S1-T3", "3" ], [ "embedded-2-S1-A11", "Tense", "embedded-2-S1-T3", "Past" ], [ "embedded-2-S1-A12", "VerbForm", "embedded-2-S1-T3", "Fin" ], [ "embedded-2-S1-A13", "Definite", "embedded-2-S1-T5", "Def" ], [ "embedded-2-S1-A14", "Gender", "embedded-2-S1-T5", "Fem" ], [ "embedded-2-S1-A15", "Number", "embedded-2-S1-T5", "Sing" ] ],
    "relations": [
        [ "embedded-2-S1-R0", "nsubj:pass", [ [ "arg1", "embedded-2-S1-T3" ], [ "arg2", "embedded-2-S1-T1" ] ] ],
        [ "embedded-2-S1-R1", "expl:pass", [ [ "arg1", "embedded-2-S1-T3" ], [ "arg2", "embedded-2-S1-T2" ] ] ],
        [ "embedded-2-S1-R2", "root", [ [ "arg1", "embedded-2-S1-T0" ], [ "arg2", "embedded-2-S1-T3" ] ] ],
        [ "embedded-2-S1-R3", "case", [ [ "arg1", "embedded-2-S1-T5" ], [ "arg2", "embedded-2-S1-T4" ] ] ],
        [ "embedded-2-S1-R4", "obl", [ [ "arg1", "embedded-2-S1-T3" ], [ "arg2", "embedded-2-S1-T5" ] ] ],
        [ "embedded-2-S1-R5", "punct", [ [ "arg1", "embedded-2-S1-T3" ], [ "arg2", "embedded-2-S1-T6" ] ] ]
    ],
    "comments": [ [ "embedded-2-S1-T1", "AnnotatorNotes", "Lemma: куче" ], [ "embedded-2-S1-T2", "AnnotatorNotes", "Lemma: се" ], [ "embedded-2-S1-T3", "AnnotatorNotes", "Lemma: преследвам" ], [ "embedded-2-S1-T4", "AnnotatorNotes", "Lemma: от" ], [ "embedded-2-S1-T5", "AnnotatorNotes", "Lemma: котка" ], [ "embedded-2-S1-T6", "AnnotatorNotes", "Lemma: ." ] ],
    "styles": [ [ [ "embedded-2-S1-T3", "embedded-2-S1-T1", "nsubj:pass" ], "color", "blue" ], [ [ "embedded-2-S1-T3", "embedded-2-S1-T5", "obl" ], "color", "blue" ] ],
    "sentlabels": [ "2" ],
    "text": "Кучето се преследваше от котката .",
    "error": false
}

# visual-style 3 1 nsubj:pass	color:blue
# visual-style 3 4 obl	color:blue
1	Pes	pes	NOUN	_	Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing	3	nsubj:pass	_	_
2	byl	být	AUX	_	Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act	3	aux:pass	_	_
3	honěn	honit	VERB	_	Aspect=Imp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass	0	root	_	_
4	kočkou	kočka	NOUN	_	Case=Ins|Gender=Fem|Number=Sing	3	obl	_	_
5	.	.	PUNCT	_	_	3	punct	_	_

# visual-style 2 1 nsubj:pass	color:blue
# visual-style 2 4 obl	color:blue
1	Hunden	hund	NOUN	_	Definite=Def	2	nsubj:pass	_	_
2	jagades	jaga	VERB	_	Tense=Past|Voice=Pass	0	root	_	_
3	av	av	ADP	_	_	4	case	_	_
4	katten	katt	NOUN	_	Definite=Def	2	obl	_	_
5	.	.	PUNCT	_	_	2	punct	_	_

What is needed for UD to be successful?

The secret to understanding the design and current success of UD is to realize that the design is a very subtle compromise between approximately 6 things:

UD needs to be satisfactory on linguistic analysis grounds for individual languages.
UD needs to be good for linguistic typology, i.e., providing a suitable basis for bringing out cross-linguistic parallelism across languages and language families.
UD must be suitable for rapid, consistent annotation by a human annotator.
UD must be easily comprehended and used by a non-linguist, whether a language learner or an engineer with prosaic needs for language processing. We refer to this as seeking a habitable design, and it leads us to favor traditional grammar notions and terminology.
UD must be suitable for computer parsing with high accuracy.
UD must support well downstream language understanding tasks (relation extraction, reading comprehension, machine translation, …).

It’s easy to come up with a proposal that improves UD on one of these dimensions. The interesting and difficult part is to improve UD while remaining sensitive to all these dimensions.

History

The Stanford dependencies were originally developed in 2005 as a backend to the Stanford parser to help in Recognizing Textual Entailment systems, then eventually emerged as the de facto standard for dependency analysis of English, and have since been adapted to a number of different languages (Chang et al., 2009, Bosco et al., 2013, Haverinen et al., 2013, Seraji et al., 2013, Tsarfaty, 2013, Lipenkova and Souček 2014). The Google universal tag set grew out of the cross-linguistic error analysis based on the CoNLL-X shared task data by McDonald and Nivre (2007), was initially used for unsupervised part-of-speech tagging by Das and Petrov (2011), and has since been adopted as a widely used standard for mapping diverse tagsets to a common standard. The Interset (Zeman, 2008) started as a tool for conversion between morphosyntactic tagsets of multiple languages. It dates back to 2006 when it was used in the first experiments with cross-lingual delexicalized parser adaptation (Zeman and Resnik, 2008). It was later employed as the morphological layer in HamleDT (Zeman et al., 2014) – a project that brings treebanks of many languages under a common annotation scheme.

The first attempt to combine Stanford dependencies and Google universal tags into a universal annotation scheme was the Universal Dependency Treebank (UDT) project (McDonald et al., 2013), which released treebanks for 6 languages in 2013 and 11 languages in 2014, and the first proposal for incorporating morphology was made by Tsarfaty (2013). The second version of HamleDT (Rosa et al., 2014) provided Stanford/Google annotation for 30 languages in 2014. This was followed by the development of universal Stanford dependencies (USD) (de Marneffe et al., 2014). The new Universal Dependencies is the result of merging all these initiatives into a single coherent framework, based on universal Stanford dependencies, an extended version of the Google universal tagset, a revised subset of the Interset feature inventory, and a revised version of the CoNLL-X format (called CoNLL-U).

The first version of the new guidelines, released in October 2014, introduced a somewhat extended universal part-of-speech tag set. This set makes some distinctions that were missing in the original proposal, but were perceived to be of importance by many, and clarifies the definition of categories. As a result of this work, universal POS categories have substantive definitions and are not necessarily just equivalence classes of categories in underlying language-particular treebanks. Hence, work to convert to UD POS tags often requires context-sensitive rules, or some hand correction. The UD morphological features aim to provide a stripped down basic set of features which are most crucial for analysis and are widespread across languages. The dependency representation of UD evolves out of Stanford Dependencies (SD), which itself follows ideas of grammatical relations-focused description that can be found in many linguistic frameworks. That is, it is centrally organized around notions of subject, object, clausal complement, noun determiner, noun modifier, etc. The goal of the new universal version was to add or refine relations to better accommodate the grammatical structures of typologically different languages and to clean up some of the quirkier and more English-specific features of the original version. Hence, the new taxonomy has less relations than the original SD.

Project organization

UD is an open collaboration with many project members. The administrative structure is kept at a minimum and currently consists of the following:

The project is coordinated by Joakim Nivre (aka chief cat herder).
Releases (including validation and documentation) are managed by Filip Ginter, Sampo Pyysalo and Dan Zeman.
Universal guidelines are managed by a small group of core members, currently consisting of Marie de Marneffe, Filip Ginter, Yoav Goldberg, Jan Hajič, Chris Manning, Ryan McDonald, Lori Levin, Joakim Nivre, Slav Petrov, Sampo Pyysalo, Nathan Schneider, Sebastian Schuster, Natalia Silveira, Reut Tsarfaty, Fran Tyers, Amir Zeldes and Dan Zeman.
Language-specific guidelines and treebanks are maintained by each specific language team.
Issues are raised on GitHub and resolved through discussion and voting.

List of contributors

2021

Marie-Catherine de Marneffe, Christopher Manning, Joakim Nivre, Daniel Zeman (2021): Universal Dependencies. In: Computational Linguistics, ISSN 1530-9312, vol. 47, no. 2, pp. 255-308.

2020

Proceedings of the Fourth Workshop on Universal Dependencies, UDW 2020, COLING, on-line.
Joakim Nivre, Marie-Catherine de Marneffe, Filip Ginter, Jan Hajič, Christopher Manning, Sampo Pyysalo, Sebastian Schuster, Francis Tyers, Daniel Zeman. 2020. Universal Dependencies v2: An Evergrowing Multilingual Treebank Collection. In Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC 2020), pp. 4034-4043, European Language Resources Association, Marseille, France, ISBN 979-10-95546-34-4.
Ika Alfina, Daniel Zeman, Arawinda Dinakaramani, Indra Budi, Heru Suhartanto (2020): Selecting the Universal Dependencies Morphological Features for Indonesian Dependency Treebank. In: Proceedings of the International Conference on Asian Language Processing (IALP 2020), pp. 104-109, Chinese and Oriental Languages Information Processing Society, Kuala Lumpur, Malaysia, ISBN 978-1-7281-7689-5.
Gosse Bouma, Djamé Seddah, Daniel Zeman (2020): Overview of the IWPT 2020 Shared Task on Parsing into Enhanced Universal Dependencies. In: Proceedings of the 16th International Conference on Parsing Technologies and the IWPT 2020 Shared Task on Parsing into Enhanced Universal Dependencies, pp. 151-161, Association for Computational Linguistics, Stroudsburg, PA, USA, ISBN 978-1-952148-11-8.
Ọlájídé Ishola, Daniel Zeman (2020): Yorùbá Dependency Treebank (YTB). In: Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC 2020), pp. 5180-5188, European Language Resources Association, Marseille, France, ISBN 979-10-95546-34-4.
Atul Kumar Ojha, Daniel Zeman (2020): Universal Dependency Treebanks for Low-Resource Indian Languages: The Case of Bhojpuri. In: Proceedings of the LREC 2020 WILDRE5 – 5th Workshop on Indian Language Data: Resources and Evaluation, pp. 33-38, European Language Resources Association, Paris, France, ISBN 979-10-95546-67-2.
Marsida Toska, Joakim Nivre, Daniel Zeman (2020): Universal Dependencies for Albanian. In: Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020), pp. 178-188, Association for Computational Linguistics, Stroudsburg, PA, USA, ISBN 978-1-952148-48-4.

2019

Proceedings of the Third Workshop on Universal Dependencies, UDW 2019, SyntaxFest, Paris.
Kira Droganova, Daniel Zeman. 2019. Towards Deep Universal Dependencies. In Proceedings of the Fifth International Conference on Dependency Linguistics (Depling, Syntaxfest 2019), pp. 144-152.
Kim Gerdes, Bruno Guillaume, Sylvain Kahane, Guy Perrier. 2019. Improving Surface-syntactic Universal Dependencies (SUD): surface-syntactic functions and deep-syntactic features, Proceedings of the 17th international conference on Treebanks and Linguistic Theories (TLT), SyntaxFest, Paris.
Anssi Yli-Jyrä. 2019. Transition-Based Coding and Formal Language Theory for Ordered Digraphs, Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing (FSMNLP, Dresden, Germany, 23–25 September 2019), pp. 118–131. SIGFSM, Association for Computational Linguistics.
Anssi Yli-Jyrä. 2019. How to embed noncrossing trees in Universal Dependencies treebanks in a low-complexity regular language. Journal of Language Modelling, 7(2):177-232.

2018

Proceedings of the Second Workshop on Universal Dependencies, UDW 2018, EMNLP, Brussels.
Puneet Dwivedi, Daniel Zeman. 2018. The Forest Lion and the Bull: Morphosyntactic Annotation of the Panchatantra. In: Computación y Sistemas, ISSN 1405-5546, vol. 22, no. 4, pp. 1377-1384.
N. Gruzitis, L. Pretkalnina, B. Saulite, L. Rituma, G. Nespore-Berzkalne, A. Znotins and P. Paikens. 2018. Creation of a Balanced State-of-the-Art Multilayer Corpus for NLU. In Proceedings of the 11th International Conference on Language Resources and Evaluation (LREC), pp. 4506-4513.
Sonja Marković, Daniel Zeman. 2018. Reflexives in Universal Dependencies. In Proceedings of the 17th International Workshop on Treebanks and Linguistic Theories (TLT 2018), pp. 131-146.
Agnieszka Patejuk and Adam Przepiórkowski. 2018. From Lexical Functional Grammar to Enhanced Universal Dependencies: Linguistically informed treebanks of Polish. Institute of Computer Science, Polish Academy of Sciences, Warsaw. (263 pages)
Lauma Pretkalniņa, Laura Rituma and Baiba Saulīte. 2018. Deriving Enhanced Universal Dependencies from a Hybrid Dependency-Constituency Treebank. In Text, Speech, and Dialogue, vol. 11107, pp. 95-105.
Adam Przepiórkowski and Agnieszka Patejuk. 2018. Arguments and adjuncts in Universal Dependencies. In Proceedings of the 27th International Conference on Computational Linguistics (COLING 2018), pages 3837–3852, Santa Fe, NM.
Sylvain Kahane, Marine Courtin, Kim Gerdes. 2018. Multi-word annotation in syntactic treebanks: Propositions for Universal Dependencies, Proceedings of the 16th international conference on Treebanks and Linguistic Theories (TLT), Prague.
Daniel Zeman. 2018. The World of Tokens, Tags and Trees. ISBN 978-80-88132-09-7.
Daniel Zeman, Jan Hajič, Martin Popel, Martin Potthast, Milan Straka, Filip Ginter, Joakim Nivre, Slav Petrov. 2018. CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies. In Proceedings of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies, pp. 1-21.

2017

Proceedings of the First Workshop on Universal Dependencies, UDW 2017, NoDaLiDa, Gothenburg.
Teresa Lynn, Jennifer Foster and Mark Dras. 2017. Morphological Features of the Irish Universal Dependency Treebank. In Proceedings of the 15th International Workshop on Treebanks and Linguistic Theories (TLT 2017), pp 111-122.
Joakim Nivre, Daniel Zeman, Filip Ginter, Francis Tyers. 2017. EACL tutorial on Universal Dependencies.
Tanja Samardžić, Mirjana Starović, Željko Agić, Nikola Ljubešić. 2017. Universal Dependencies for Serbian in Comparison with Croatian and Other Slavic Languages. In: Proceedings of BSNLP 2017, Valencia, Spain.
Dima Taji, Nizar Habash, Daniel Zeman. 2017. Universal Dependencies for Arabic. In Proceedings of the Third Arabic Natural Language Processing Workshop (WANLP), pp. 166-176.
Daniel Zeman. 2017. Core Arguments in Universal Dependencies. In Proceedings of the Fourth International Conference on Dependency Linguistics (Depling 2017).
Daniel Zeman. 2017. Slovak Dependency Treebank in Universal Dependencies. In Jazykovedný časopis / Journal of Linguistics, ISSN 0021-5597, vol. 68, no. 2, pp. 385-395.
Daniel Zeman, Martin Popel, Milan Straka, Jan Hajič, Joakim Nivre, Filip Ginter, Juhani Luotolahti, Sampo Pyysalo, Slav Petrov, Martin Potthast, Francis Tyers, Elena Badmaeva, Memduh Gökırmak, Anna Nedoluzhko, Silvie Cinková, Jan Hajič, jr., Jaroslava Hlaváčová, Václava Kettnerová, Zdeňka Urešová, Jenna Kanerva, Stina Ojala, Anna Missilä, Christopher Manning, Sebastian Schuster, Siva Reddy, Dima Taji, Nizar Habash, Herman Leung, Marie-Catherine de Marneffe, Manuela Sanguinetti, Maria Simi, Hiroshi Kanayama, Valeria de Paiva, Kira Droganova, Héctor Martínez Alonso, Çağrı Çöltekin, Umut Sulubacak, Hans Uszkoreit, Vivien Macketanz, Aljoscha Burchardt, Kim Harris, Katrin Marheinecke, Georg Rehm, Tolga Kayadelen, Mohammed Attia, Ali Elkahky, Zhuoran Yu, Emily Pitler, Saran Lertpradit, Michael Mandl, Jesse Kirchner, Hector Fernandez Alcalde, Jana Strnadová, Esha Banerjee, Ruli Manurung, Antonio Stella, Atsuko Shimada, Sookyoung Kwak, Gustavo Mendonça, Tatiana Lando, Rattima Nitisaroj, Josie Li. 2017. CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies. In Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies, pp. 1-19.
Rademaker, Alexandre, Fabricio Chalub, Livy Real, Cláudia Freitas, Eckhard Bick, and Valeria de Paiva Universal Dependencies for Portuguese. 2017. “Universal Dependencies for Portuguese.” In Proceedings of the Fourth International Conference on Dependency Linguistics (Depling), 197–206. Pisa, Italy.
Anssi Yli-Jyrä. 2017. Bounded-Depth High-Coverage Search Space for Noncrossing Parses. In Proceedings of the 13th International Conference on Finite State Methods and Natural Language Processing (FSMNLP 2017, Umeå, Sweden, 4-6 September 2017), pp. 30-40. SIGFSM, Association for Computational Linguistics.

2016

Teresa Lynn and Jennifer Foster. 2016. Universal Dependencies for Irish. In Proceedings of the Second Celtic Language Technology Workshop (CLTW 2016), pp. 79-92.
Héctor Martínez Alonso, Daniel Zeman. 2016. Universal Dependencies for the AnCora treebanks. In Procesamiento del Lenguaje Natural, ISSN 1135-5948, 57, pp. 91-98.
Joakim Nivre, Marie-Catherine de Marneffe, Filip Ginter, Yoav Goldberg, Jan Hajič, Christopher D. Manning, Ryan McDonald, Slav Petrov, Sampo Pyysalo, Natalia Silveira, Reut Tsarfaty, Daniel Zeman. 2016. Universal Dependencies v1: A Multilingual Treebank Collection. In Proceedings of LREC.
Sebastian Schuster, Christopher D. Manning. 2016. Enhanced English Universal Dependencies: An Improved Representation for Natural Language Understanding Tasks. In Proceedings of LREC.
Mojgan Seraji, Filip Ginter, Joakim Nivre. 2016. Universal Dependencies for Persian. In Proceedings of LREC, pages 2361-2365.
Daniel Zeman. 2016. Universal Annotation of Slavic Verb Forms. In The Prague Bulletin of Mathematical Linguistics, ISSN 0032-6585, 105, pp. 143-193.

2015

Željko Agić, Nikola Ljubešić. 2015. Universal Dependencies for Croatian (that Work for Serbian, too), In: Proceedings of BSNLP 2015, Hissar, Bulgaria.
Kim Gerdes, Sylvain Kahane. 2015. Non-constituent coordination and other coordinative constructions as dependency graphs, Proceedings of the 3rd international conference on Dependency Linguistics (Depling), Uppsala.
Joakim Nivre. 2015. Towards a Universal Grammar for Natural Language Processing. Computational Linguistics and Intelligent Text Processing.
Petya Osenova and Kiril Simov. 2015. Universalizing BulTreeBank: a Linguistic Tale about Glocalization. In: Proceedings of BSNLP 2015, Hissar, Bulgaria, pp. 81–89.
Sampo Pyysalo, Jenna Kanerva, Anna Missilä, Veronika Laippala, and Filip Ginter. 2015. Universal Dependencies for Finnish. In Proceedings of Nodalida 2015.
Daniel Zeman. 2015. Slavic Languages in Universal Dependencies. In Slovko 2015: Natural Language Processing, Corpus Linguistics, E-learning. Bratislava, Slovakia. PDF

2014

Joakim Nivre. 2014. Universal Dependencies for Swedish. In SLTC 2014.
Rudolf Rosa, Jan Mašek, David Mareček, Martin Popel, Daniel Zeman, Zdeněk Žabokrtský. 2014. HamleDT 2.0: Thirty Dependency Treebanks Stanfordized. In Proceedings of LREC. (home page)
Daniel Zeman, Ondřej Dušek, David Mareček, Martin Popel, Loganathan Ramasamy, Jan Štěpánek, Zdeněk Žabokrtský, and Jan Hajič. 2014. HamleDT: Harmonized multi-language dependency treebank. In Language Resources and Evaluation, DOI 10.1007/s10579-014-9275-2. (Extended version of paper from LREC 2012.)

2013 and before

Cristina Bosco, Simonetta Montemagni, Maria Simi. 2013. Converting Italian treebanks: Towards an Italian Stanford dependency treebank, In 7th Linguistic Annotation Workshop and Interoperability with Discourse.
Pi-Chuan Chang, Huihsin Tseng, Dan Jurafsky, and Christopher D. Manning. 2009. Discriminative Reordering with Chinese Grammatical Relations Features. In Proceedings of the Third Workshop on Syntax and Structure in Statistical Translation.
Dipanjan Das, and Slav Petrov. 2011. Unsupervised part-of-speech tagging with bilingual graph-based projections In Proceedings of ACL.
Katri Haverinen, Jenna Nyblom, Timo Viljanen, Veronika Laippala, Samuel Kohonen, Anna Missilä, Stina Ojala, Tapio Salakoski, and Filip Ginter. 2013. Building the essential resources for Finnish: the Turku Dependency Treebank. Language Resources and Evaluation. Volume 48, Issue 3, pp 493-531.
Janna Lipenkova and Milan Souček. 2014. Converting Russian Dependency Treebank to Stanford Typed Dependencies Representation. In Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, pp. 143-147.
Teresa Lynn, Jennifer Foster, Mark Dras and Lamia Tounsi. 2014. Cross-lingual Transfer Parsing for Low-Resourced Languages: An Irish Case Study In Proceedings of the First Celtic Language Technology Workshop (CLTW 2014)
Marie-Catherine de Marneffe, Miriam Connor, Natalia Silveira, Samuel R. Bowman, Timothy Dozat, and Christopher D. Manning. 2013. More constructions, more genres: extending Stanfod Dependencies. In Proceedings of the Second International Conference on Dependency Linguistics (DepLing 2013).
Marie-Catherine de Marneffe, Timothy Dozat, Natalia Silveira, Katri Haverinen, Filip Ginter, Joakim Nivre, and Christopher D. Manning. 2014. Universal Stanford Dependencies: A cross-linguistic typology. In Proceedings of LREC.
Marie-Catherine de Marneffe, Bill MacCartney, and Christopher D. Manning. 2006. Generating typed dependency parses from phrase structure parses. In Proceedings of LREC.
Marie-Catherine de Marneffe and Christopher D. Manning. 2008. The Stanford typed dependencies representation. In COLING Workshop on Cross-framework and Cross-domain Parser Evaluation.
Ryan McDonald, and Joakim Nivre. 2007. Characterizing the errors of data-driven dependency parsing models. In Proceedings of EMNLP-CoNLL.
Ryan McDonald, Joakim Nivre, Yvonne Quirmbach-Brundage, Yoav Goldberg, Dipanjan Das, Kuzman Ganchev, Keith Hall, Slav Petrov, Hao Zhang, Oscar Täckström, Claudia Bedini, Núria Bertomeu Castelló, and Jungmee Lee. 2013. Universal Dependency Annotation for Multilingual Parsing. In Proceedings of ACL. (home page)
Slav Petrov, Dipanjan Das, and Ryan McDonald. 2012. A universal part-of-speech tagset. In Proceedings of LREC. (home page)
Mojgan Seraji, Carina Jahani, Beáta Megyesi, and Joakim Nivre. 2013. A Persian treebank with Stanford typed dependencies. In Proceedings of LREC.
Pavel Straňák, Jan Štěpánek. 2010. Representing Layered and Structured Data in the CoNLL-ST Format. In Proceedings of ICGL 2010.
Reut Tsarfaty. 2013. A unified morpho-syntactic scheme of Stanford dependencies. In Proceedings of ACL.
Daniel Zeman. 2008. Reusable Tagset Conversion Using Tagset Drivers. In Proceedings of LREC. (home page)
Daniel Zeman, and Philip Resnik. 2008. Cross-Language Parser Adaptation between Related Languages. In Proceedings of IJCNLP 2008 Workshop on NLP for Less Privileged Languages