id
stringlengths
1
5
tokens
sequence
pos_tags
sequence
chunk_tags
sequence
ner_tags
sequence
0
[ "EU", "rejects", "German", "call", "to", "boycott", "British", "lamb", "." ]
[ 22, 42, 16, 21, 35, 37, 16, 21, 7 ]
[ 11, 21, 11, 12, 21, 22, 11, 12, 0 ]
[ 3, 0, 7, 0, 0, 0, 7, 0, 0 ]
1
[ "Peter", "Blackburn" ]
[ 22, 22 ]
[ 11, 12 ]
[ 1, 2 ]
2
[ "BRUSSELS", "1996-08-22" ]
[ 22, 11 ]
[ 11, 12 ]
[ 5, 0 ]
3
[ "The", "European", "Commission", "said", "on", "Thursday", "it", "disagreed", "with", "German", "advice", "to", "consumers", "to", "shun", "British", "lamb", "until", "scientists", "determine", "whether", "mad", "cow", "disease", "can", "be", "transmitted", "to", "sheep", "." ]
[ 12, 22, 22, 38, 15, 22, 28, 38, 15, 16, 21, 35, 24, 35, 37, 16, 21, 15, 24, 41, 15, 16, 21, 21, 20, 37, 40, 35, 21, 7 ]
[ 11, 12, 12, 21, 13, 11, 11, 21, 13, 11, 12, 13, 11, 21, 22, 11, 12, 17, 11, 21, 17, 11, 12, 12, 21, 22, 22, 13, 11, 0 ]
[ 0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
4
[ "Germany", "'s", "representative", "to", "the", "European", "Union", "'s", "veterinary", "committee", "Werner", "Zwingmann", "said", "on", "Wednesday", "consumers", "should", "buy", "sheepmeat", "from", "countries", "other", "than", "Britain", "until", "the", "scientific", "advice", "was", "clearer", "." ]
[ 22, 27, 21, 35, 12, 22, 22, 27, 16, 21, 22, 22, 38, 15, 22, 24, 20, 37, 21, 15, 24, 16, 15, 22, 15, 12, 16, 21, 38, 17, 7 ]
[ 11, 11, 12, 13, 11, 12, 12, 11, 12, 12, 12, 12, 21, 13, 11, 12, 21, 22, 11, 13, 11, 1, 13, 11, 17, 11, 12, 12, 21, 1, 0 ]
[ 5, 0, 0, 0, 0, 3, 4, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0 ]
5
[ "\"", "We", "do", "n't", "support", "any", "such", "recommendation", "because", "we", "do", "n't", "see", "any", "grounds", "for", "it", ",", "\"", "the", "Commission", "'s", "chief", "spokesman", "Nikolaus", "van", "der", "Pas", "told", "a", "news", "briefing", "." ]
[ 0, 28, 41, 30, 37, 12, 16, 21, 15, 28, 41, 30, 37, 12, 24, 15, 28, 6, 0, 12, 22, 27, 16, 21, 22, 22, 14, 22, 38, 12, 21, 21, 7 ]
[ 0, 11, 21, 22, 22, 11, 12, 12, 17, 11, 21, 22, 22, 11, 12, 13, 11, 0, 0, 11, 12, 11, 12, 12, 12, 12, 12, 12, 21, 11, 12, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0 ]
6
[ "He", "said", "further", "scientific", "study", "was", "required", "and", "if", "it", "was", "found", "that", "action", "was", "needed", "it", "should", "be", "taken", "by", "the", "European", "Union", "." ]
[ 28, 38, 16, 16, 21, 38, 40, 10, 15, 28, 38, 40, 15, 21, 38, 40, 28, 20, 37, 40, 15, 12, 22, 22, 7 ]
[ 11, 21, 11, 12, 12, 21, 22, 0, 17, 11, 21, 22, 17, 11, 21, 22, 11, 21, 22, 22, 13, 11, 12, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0 ]
7
[ "He", "said", "a", "proposal", "last", "month", "by", "EU", "Farm", "Commissioner", "Franz", "Fischler", "to", "ban", "sheep", "brains", ",", "spleens", "and", "spinal", "cords", "from", "the", "human", "and", "animal", "food", "chains", "was", "a", "highly", "specific", "and", "precautionary", "move", "to", "protect", "human", "health", "." ]
[ 28, 38, 12, 21, 16, 21, 15, 22, 22, 22, 22, 22, 35, 37, 21, 24, 6, 24, 10, 16, 24, 15, 12, 21, 10, 21, 21, 24, 38, 12, 30, 16, 10, 16, 21, 35, 37, 16, 21, 7 ]
[ 11, 21, 11, 12, 11, 12, 13, 11, 12, 12, 12, 12, 21, 22, 11, 12, 0, 11, 0, 11, 12, 13, 11, 12, 12, 12, 12, 12, 21, 11, 12, 1, 2, 2, 11, 21, 22, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
8
[ "Fischler", "proposed", "EU-wide", "measures", "after", "reports", "from", "Britain", "and", "France", "that", "under", "laboratory", "conditions", "sheep", "could", "contract", "Bovine", "Spongiform", "Encephalopathy", "(", "BSE", ")", "--", "mad", "cow", "disease", "." ]
[ 17, 40, 22, 42, 15, 24, 15, 22, 10, 22, 43, 15, 21, 24, 21, 20, 37, 22, 22, 22, 4, 22, 5, 8, 16, 21, 21, 7 ]
[ 11, 12, 12, 21, 13, 11, 13, 11, 12, 12, 11, 13, 11, 11, 12, 21, 22, 11, 12, 12, 0, 11, 0, 0, 11, 12, 12, 0 ]
[ 1, 0, 7, 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0 ]
9
[ "But", "Fischler", "agreed", "to", "review", "his", "proposal", "after", "the", "EU", "'s", "standing", "veterinary", "committee", ",", "mational", "animal", "health", "officials", ",", "questioned", "if", "such", "action", "was", "justified", "as", "there", "was", "only", "a", "slight", "risk", "to", "human", "health", "." ]
[ 10, 22, 38, 35, 37, 29, 21, 15, 12, 22, 27, 21, 16, 21, 6, 16, 21, 21, 24, 6, 38, 15, 16, 21, 38, 40, 15, 30, 38, 30, 12, 16, 21, 35, 16, 21, 7 ]
[ 0, 11, 21, 22, 22, 11, 12, 13, 11, 12, 11, 12, 12, 12, 0, 11, 12, 12, 12, 0, 21, 17, 11, 12, 21, 22, 13, 3, 21, 3, 11, 12, 12, 13, 11, 12, 0 ]
[ 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
10
[ "Spanish", "Farm", "Minister", "Loyola", "de", "Palacio", "had", "earlier", "accused", "Fischler", "at", "an", "EU", "farm", "ministers", "'", "meeting", "of", "causing", "unjustified", "alarm", "through", "\"", "dangerous", "generalisation", ".", "\"" ]
[ 22, 22, 22, 22, 22, 22, 38, 31, 40, 22, 15, 12, 16, 21, 24, 27, 21, 15, 39, 16, 21, 15, 0, 16, 21, 7, 0 ]
[ 11, 12, 12, 12, 12, 12, 21, 22, 22, 11, 13, 11, 12, 12, 12, 11, 12, 13, 21, 1, 11, 13, 0, 11, 12, 0, 0 ]
[ 7, 0, 0, 1, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
11
[ "." ]
[ 7 ]
[ 0 ]
[ 0 ]
12
[ "Only", "France", "and", "Britain", "backed", "Fischler", "'s", "proposal", "." ]
[ 30, 22, 10, 22, 38, 22, 27, 21, 7 ]
[ 11, 12, 12, 12, 21, 11, 11, 12, 0 ]
[ 0, 5, 0, 5, 0, 1, 0, 0, 0 ]
13
[ "The", "EU", "'s", "scientific", "veterinary", "and", "multidisciplinary", "committees", "are", "due", "to", "re-examine", "the", "issue", "early", "next", "month", "and", "make", "recommendations", "to", "the", "senior", "veterinary", "officials", "." ]
[ 12, 22, 27, 16, 16, 10, 16, 24, 41, 16, 35, 37, 12, 21, 30, 16, 21, 10, 37, 24, 35, 12, 16, 16, 24, 7 ]
[ 11, 12, 11, 12, 12, 12, 12, 12, 21, 1, 21, 22, 11, 12, 11, 12, 12, 0, 21, 11, 13, 11, 12, 12, 12, 0 ]
[ 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
14
[ "Sheep", "have", "long", "been", "known", "to", "contract", "scrapie", ",", "a", "brain-wasting", "disease", "similar", "to", "BSE", "which", "is", "believed", "to", "have", "been", "transferred", "to", "cattle", "through", "feed", "containing", "animal", "waste", "." ]
[ 22, 41, 30, 40, 40, 35, 21, 21, 6, 12, 16, 21, 16, 35, 22, 43, 42, 40, 35, 37, 40, 40, 35, 24, 15, 21, 39, 21, 21, 7 ]
[ 11, 21, 22, 22, 22, 13, 11, 12, 0, 11, 12, 12, 1, 13, 11, 11, 21, 22, 22, 22, 22, 22, 13, 11, 13, 11, 21, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
15
[ "British", "farmers", "denied", "on", "Thursday", "there", "was", "any", "danger", "to", "human", "health", "from", "their", "sheep", ",", "but", "expressed", "concern", "that", "German", "government", "advice", "to", "consumers", "to", "avoid", "British", "lamb", "might", "influence", "consumers", "across", "Europe", "." ]
[ 16, 24, 40, 15, 22, 13, 38, 12, 21, 35, 16, 21, 15, 29, 21, 6, 10, 38, 21, 15, 16, 21, 21, 35, 24, 35, 37, 16, 21, 20, 37, 24, 15, 22, 7 ]
[ 11, 12, 21, 13, 11, 11, 21, 11, 12, 13, 11, 12, 13, 11, 12, 0, 0, 21, 11, 17, 11, 12, 12, 13, 11, 21, 22, 11, 12, 21, 22, 11, 13, 11, 0 ]
[ 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 5, 0 ]
16
[ "\"", "What", "we", "have", "to", "be", "extremely", "careful", "of", "is", "how", "other", "countries", "are", "going", "to", "take", "Germany", "'s", "lead", ",", "\"", "Welsh", "National", "Farmers", "'", "Union", "(", "NFU", ")", "chairman", "John", "Lloyd", "Jones", "said", "on", "BBC", "radio", "." ]
[ 0, 44, 28, 41, 35, 37, 30, 16, 15, 42, 46, 16, 24, 41, 39, 35, 37, 22, 27, 21, 6, 0, 22, 22, 22, 27, 22, 4, 22, 5, 21, 22, 22, 22, 38, 15, 22, 21, 7 ]
[ 0, 11, 11, 21, 22, 22, 1, 2, 13, 21, 3, 11, 12, 21, 22, 22, 22, 11, 11, 12, 0, 0, 11, 12, 12, 11, 12, 0, 11, 0, 11, 12, 12, 12, 21, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 3, 4, 4, 4, 4, 0, 3, 0, 0, 1, 2, 2, 0, 0, 3, 4, 0 ]
17
[ "Bonn", "has", "led", "efforts", "to", "protect", "public", "health", "after", "consumer", "confidence", "collapsed", "in", "March", "after", "a", "British", "report", "suggested", "humans", "could", "contract", "an", "illness", "similar", "to", "mad", "cow", "disease", "by", "eating", "contaminated", "beef", "." ]
[ 22, 42, 40, 24, 35, 37, 16, 21, 15, 21, 21, 38, 15, 22, 15, 12, 16, 21, 38, 24, 20, 37, 12, 21, 16, 35, 16, 21, 21, 15, 39, 40, 21, 7 ]
[ 11, 21, 22, 11, 21, 22, 11, 12, 13, 11, 12, 21, 13, 11, 13, 11, 12, 12, 21, 11, 21, 22, 11, 12, 1, 2, 2, 11, 12, 13, 21, 22, 11, 0 ]
[ 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
18
[ "Germany", "imported", "47,600", "sheep", "from", "Britain", "last", "year", ",", "nearly", "half", "of", "total", "imports", "." ]
[ 22, 38, 11, 21, 15, 22, 16, 21, 6, 30, 21, 15, 16, 24, 7 ]
[ 11, 21, 11, 12, 13, 11, 11, 12, 0, 11, 12, 13, 11, 12, 0 ]
[ 5, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
19
[ "It", "brought", "in", "4,275", "tonnes", "of", "British", "mutton", ",", "some", "10", "percent", "of", "overall", "imports", "." ]
[ 28, 38, 15, 11, 24, 15, 16, 21, 6, 12, 11, 21, 15, 16, 24, 7 ]
[ 11, 21, 13, 11, 12, 13, 11, 12, 0, 11, 12, 12, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
20
[ "Rare", "Hendrix", "song", "draft", "sells", "for", "almost", "$", "17,000", "." ]
[ 22, 22, 21, 21, 42, 15, 30, 3, 11, 7 ]
[ 11, 12, 12, 12, 21, 13, 11, 12, 12, 0 ]
[ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 ]
21
[ "LONDON", "1996-08-22" ]
[ 22, 11 ]
[ 11, 12 ]
[ 5, 0 ]
22
[ "A", "rare", "early", "handwritten", "draft", "of", "a", "song", "by", "U.S.", "guitar", "legend", "Jimi", "Hendrix", "was", "sold", "for", "almost", "$", "17,000", "on", "Thursday", "at", "an", "auction", "of", "some", "of", "the", "late", "musician", "'s", "favourite", "possessions", "." ]
[ 12, 16, 16, 16, 21, 15, 12, 21, 15, 22, 21, 21, 22, 22, 38, 40, 15, 30, 3, 11, 15, 22, 15, 12, 21, 15, 12, 15, 12, 16, 21, 27, 16, 24, 7 ]
[ 11, 12, 12, 12, 12, 13, 11, 12, 13, 11, 12, 12, 12, 12, 21, 22, 13, 11, 12, 12, 13, 11, 13, 11, 12, 13, 11, 13, 11, 12, 12, 11, 12, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
23
[ "A", "Florida", "restaurant", "paid", "10,925", "pounds", "(", "$", "16,935", ")", "for", "the", "draft", "of", "\"", "Ai", "n't", "no", "telling", "\"", ",", "which", "Hendrix", "penned", "on", "a", "piece", "of", "London", "hotel", "stationery", "in", "late", "1966", "." ]
[ 12, 22, 21, 38, 11, 24, 4, 3, 11, 5, 15, 12, 21, 15, 0, 42, 30, 12, 39, 0, 6, 43, 22, 40, 15, 12, 21, 15, 22, 21, 21, 15, 16, 11, 7 ]
[ 11, 12, 12, 21, 11, 12, 0, 11, 12, 0, 13, 11, 12, 13, 0, 21, 0, 11, 21, 0, 0, 11, 11, 21, 13, 11, 12, 13, 11, 12, 12, 13, 11, 12, 0 ]
[ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0 ]
24
[ "At", "the", "end", "of", "a", "January", "1967", "concert", "in", "the", "English", "city", "of", "Nottingham", "he", "threw", "the", "sheet", "of", "paper", "into", "the", "audience", ",", "where", "it", "was", "retrieved", "by", "a", "fan", "." ]
[ 15, 12, 21, 15, 12, 22, 11, 21, 15, 12, 16, 21, 15, 22, 28, 38, 12, 21, 15, 21, 15, 12, 21, 6, 46, 28, 38, 40, 15, 12, 21, 7 ]
[ 13, 11, 12, 13, 11, 12, 12, 12, 13, 11, 12, 12, 13, 11, 11, 21, 11, 12, 13, 11, 13, 11, 12, 0, 3, 11, 21, 22, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
25
[ "Buyers", "also", "snapped", "up", "16", "other", "items", "that", "were", "put", "up", "for", "auction", "by", "Hendrix", "'s", "former", "girlfriend", "Kathy", "Etchingham", ",", "who", "lived", "with", "him", "from", "1966", "to", "1969", "." ]
[ 24, 30, 38, 30, 11, 16, 24, 43, 38, 40, 33, 15, 21, 15, 22, 27, 16, 21, 22, 22, 6, 44, 38, 15, 28, 15, 11, 35, 11, 7 ]
[ 11, 3, 21, 3, 11, 12, 12, 11, 21, 22, 15, 13, 11, 13, 11, 11, 12, 12, 12, 12, 0, 11, 21, 13, 11, 13, 11, 13, 11, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
26
[ "They", "included", "a", "black", "lacquer", "and", "mother", "of", "pearl", "inlaid", "box", "used", "by", "Hendrix", "to", "store", "his", "drugs", ",", "which", "an", "anonymous", "Australian", "purchaser", "bought", "for", "5,060", "pounds", "(", "$", "7,845", ")", "." ]
[ 28, 38, 12, 16, 21, 10, 21, 15, 21, 38, 21, 40, 15, 22, 35, 37, 29, 24, 6, 43, 12, 16, 16, 21, 38, 15, 11, 24, 4, 3, 11, 5, 7 ]
[ 11, 21, 11, 12, 12, 12, 12, 13, 11, 21, 11, 21, 13, 11, 21, 22, 11, 12, 0, 11, 11, 12, 12, 12, 21, 13, 11, 12, 0, 11, 12, 0, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
27
[ "The", "guitarist", "died", "of", "a", "drugs", "overdose", "in", "1970", "aged", "27", "." ]
[ 12, 21, 38, 15, 12, 24, 21, 15, 11, 40, 11, 7 ]
[ 11, 12, 21, 13, 11, 12, 12, 13, 11, 1, 11, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
28
[ "China", "says", "Taiwan", "spoils", "atmosphere", "for", "talks", "." ]
[ 22, 42, 22, 42, 21, 15, 24, 7 ]
[ 11, 21, 11, 21, 11, 13, 11, 0 ]
[ 5, 0, 5, 0, 0, 0, 0, 0 ]
29
[ "BEIJING", "1996-08-22" ]
[ 39, 11 ]
[ 21, 11 ]
[ 5, 0 ]
30
[ "China", "on", "Thursday", "accused", "Taipei", "of", "spoiling", "the", "atmosphere", "for", "a", "resumption", "of", "talks", "across", "the", "Taiwan", "Strait", "with", "a", "visit", "to", "Ukraine", "by", "Taiwanese", "Vice", "President", "Lien", "Chan", "this", "week", "that", "infuriated", "Beijing", "." ]
[ 22, 15, 22, 38, 22, 15, 39, 12, 21, 15, 12, 21, 15, 24, 15, 12, 22, 22, 15, 12, 21, 35, 22, 15, 22, 22, 22, 22, 22, 12, 21, 43, 38, 22, 7 ]
[ 11, 13, 11, 21, 11, 13, 21, 11, 12, 13, 11, 12, 13, 11, 13, 11, 12, 12, 13, 11, 12, 13, 11, 13, 11, 12, 12, 12, 12, 11, 12, 11, 21, 11, 0 ]
[ 5, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 0, 0, 0, 0, 5, 0, 7, 0, 0, 1, 2, 0, 0, 0, 0, 5, 0 ]
31
[ "Speaking", "only", "hours", "after", "Chinese", "state", "media", "said", "the", "time", "was", "right", "to", "engage", "in", "political", "talks", "with", "Taiwan", ",", "Foreign", "Ministry", "spokesman", "Shen", "Guofang", "told", "Reuters", ":", "\"", "The", "necessary", "atmosphere", "for", "the", "opening", "of", "the", "talks", "has", "been", "disrupted", "by", "the", "Taiwan", "authorities", ".", "\"" ]
[ 39, 16, 24, 15, 16, 21, 24, 38, 12, 21, 38, 30, 35, 37, 15, 16, 24, 15, 22, 6, 22, 22, 21, 22, 22, 38, 22, 8, 0, 12, 16, 21, 15, 12, 21, 15, 12, 24, 42, 40, 40, 15, 12, 22, 24, 7, 0 ]
[ 21, 3, 11, 13, 11, 12, 12, 21, 11, 12, 21, 1, 21, 22, 13, 11, 12, 13, 11, 0, 11, 12, 12, 12, 12, 21, 11, 0, 0, 11, 12, 12, 13, 11, 12, 13, 11, 12, 21, 22, 22, 13, 11, 12, 12, 0, 0 ]
[ 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 3, 4, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 ]
32
[ "State", "media", "quoted", "China", "'s", "top", "negotiator", "with", "Taipei", ",", "Tang", "Shubei", ",", "as", "telling", "a", "visiting", "group", "from", "Taiwan", "on", "Wednesday", "that", "it", "was", "time", "for", "the", "rivals", "to", "hold", "political", "talks", "." ]
[ 21, 24, 38, 22, 27, 16, 21, 15, 22, 6, 22, 22, 6, 15, 39, 12, 16, 21, 15, 22, 15, 22, 15, 28, 38, 21, 15, 12, 24, 35, 37, 16, 24, 7 ]
[ 11, 12, 21, 11, 11, 12, 12, 13, 11, 0, 11, 12, 0, 13, 21, 11, 12, 12, 13, 11, 13, 11, 17, 11, 21, 11, 13, 11, 12, 21, 22, 11, 12, 0 ]
[ 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
33
[ "\"", "Now", "is", "the", "time", "for", "the", "two", "sides", "to", "engage", "in", "political", "talks", "..." ]
[ 0, 30, 42, 12, 21, 15, 12, 11, 24, 35, 37, 15, 16, 24, 8 ]
[ 0, 3, 21, 11, 12, 13, 11, 12, 12, 21, 22, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
34
[ "that", "is", "to", "end", "the", "state", "of", "hostility", ",", "\"", "Thursday", "'s", "overseas", "edition", "of", "the", "People", "'s", "Daily", "quoted", "Tang", "as", "saying", "." ]
[ 43, 42, 35, 37, 12, 21, 15, 21, 6, 0, 22, 27, 16, 21, 15, 12, 23, 27, 16, 40, 22, 15, 39, 7 ]
[ 11, 21, 21, 22, 11, 12, 13, 11, 0, 0, 11, 11, 12, 12, 13, 11, 12, 11, 12, 21, 11, 13, 21, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 0, 1, 0, 0, 0 ]
35
[ "The", "foreign", "ministry", "'s", "Shen", "told", "Reuters", "Television", "in", "an", "interview", "he", "had", "read", "reports", "of", "Tang", "'s", "comments", "but", "gave", "no", "details", "of", "why", "the", "negotiator", "had", "considered", "the", "time", "right", "for", "talks", "with", "Taiwan", ",", "which", "Beijing", "considers", "a", "renegade", "province", "." ]
[ 12, 16, 21, 27, 22, 38, 22, 22, 15, 12, 21, 28, 38, 40, 24, 15, 22, 27, 24, 10, 38, 12, 24, 15, 46, 12, 21, 38, 40, 12, 21, 30, 15, 24, 15, 22, 6, 43, 22, 42, 12, 21, 21, 7 ]
[ 11, 12, 12, 11, 12, 21, 11, 12, 13, 11, 12, 11, 21, 22, 11, 13, 11, 11, 12, 0, 21, 11, 12, 13, 3, 11, 12, 21, 22, 11, 12, 3, 13, 11, 13, 11, 0, 11, 11, 21, 11, 12, 12, 0 ]
[ 0, 0, 0, 0, 3, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0, 0, 0, 0, 0 ]
36
[ "China", ",", "which", "has", "long", "opposed", "all", "Taipei", "efforts", "to", "gain", "greater", "international", "recognition", ",", "was", "infuriated", "by", "a", "visit", "to", "Ukraine", "this", "week", "by", "Taiwanese", "Vice", "President", "Lien", "." ]
[ 22, 6, 43, 42, 30, 40, 12, 22, 24, 35, 37, 17, 16, 21, 6, 38, 40, 15, 12, 21, 35, 22, 12, 21, 15, 22, 22, 22, 22, 7 ]
[ 11, 0, 11, 21, 22, 22, 11, 12, 12, 21, 22, 11, 12, 12, 0, 21, 22, 13, 11, 12, 13, 11, 11, 12, 13, 11, 12, 12, 12, 0 ]
[ 5, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 1, 0 ]
37
[ "China", "says", "time", "right", "for", "Taiwan", "talks", "." ]
[ 22, 42, 21, 30, 15, 22, 24, 7 ]
[ 11, 21, 11, 3, 13, 11, 12, 0 ]
[ 5, 0, 0, 0, 0, 5, 0, 0 ]
38
[ "BEIJING", "1996-08-22" ]
[ 39, 11 ]
[ 21, 11 ]
[ 5, 0 ]
39
[ "China", "has", "said", "it", "was", "time", "for", "political", "talks", "with", "Taiwan", "and", "that", "the", "rival", "island", "should", "take", "practical", "steps", "towards", "that", "goal", "." ]
[ 22, 42, 38, 28, 38, 21, 15, 16, 24, 15, 22, 10, 15, 12, 16, 21, 20, 37, 16, 24, 15, 12, 21, 7 ]
[ 11, 21, 22, 11, 21, 11, 13, 11, 12, 13, 11, 0, 17, 11, 12, 12, 21, 22, 11, 12, 13, 11, 12, 0 ]
[ 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
40
[ "Consultations", "should", "be", "held", "to", "set", "the", "time", "and", "format", "of", "the", "talks", ",", "the", "official", "Xinhua", "news", "agency", "quoted", "Tang", "Shubei", ",", "executive", "vice", "chairman", "of", "the", "Association", "for", "Relations", "Across", "the", "Taiwan", "Straits", ",", "as", "saying", "late", "on", "Wednesday", "." ]
[ 24, 20, 37, 40, 35, 37, 12, 21, 10, 21, 15, 12, 24, 6, 12, 21, 22, 21, 21, 40, 22, 22, 6, 16, 21, 21, 15, 12, 22, 15, 22, 15, 12, 22, 22, 6, 15, 39, 30, 30, 22, 7 ]
[ 11, 21, 22, 22, 21, 22, 11, 12, 12, 12, 13, 11, 12, 0, 11, 12, 12, 12, 12, 21, 11, 12, 0, 11, 12, 12, 13, 11, 12, 13, 11, 13, 11, 12, 12, 0, 13, 21, 11, 3, 11, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0 ]
41
[ "German", "July", "car", "registrations", "up", "14.2", "pct", "yr", "/", "yr", "." ]
[ 22, 22, 21, 24, 30, 11, 16, 21, 34, 21, 7 ]
[ 11, 12, 12, 12, 3, 11, 12, 12, 0, 11, 0 ]
[ 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
42
[ "FRANKFURT", "1996-08-22" ]
[ 22, 11 ]
[ 11, 12 ]
[ 5, 0 ]
43
[ "German", "first-time", "registrations", "of", "motor", "vehicles", "jumped", "14.2", "percent", "in", "July", "this", "year", "from", "the", "year-earlier", "period", ",", "the", "Federal", "office", "for", "motor", "vehicles", "said", "on", "Thursday", "." ]
[ 16, 16, 24, 15, 21, 24, 38, 11, 21, 15, 22, 12, 21, 15, 12, 16, 21, 6, 12, 22, 21, 15, 21, 24, 38, 15, 22, 7 ]
[ 11, 12, 12, 13, 11, 12, 21, 11, 12, 13, 11, 11, 12, 13, 11, 12, 12, 0, 11, 12, 12, 13, 11, 12, 21, 13, 11, 0 ]
[ 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 4, 0, 0, 0, 0 ]
44
[ "The", "office", "said", "356,725", "new", "cars", "were", "registered", "in", "July", "1996", "--", "304,850", "passenger", "cars", "and", "15,613", "trucks", "." ]
[ 12, 21, 38, 11, 16, 24, 38, 40, 15, 22, 11, 8, 11, 21, 24, 10, 11, 24, 7 ]
[ 11, 12, 21, 11, 12, 12, 21, 22, 13, 11, 12, 0, 11, 12, 12, 0, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
45
[ "The", "figures", "represent", "a", "13.6", "percent", "increase", "for", "passenger", "cars", "and", "a", "2.2", "percent", "decline", "for", "trucks", "from", "July", "1995", "." ]
[ 12, 24, 41, 12, 11, 21, 21, 15, 21, 24, 10, 12, 11, 21, 21, 15, 24, 15, 22, 11, 7 ]
[ 11, 12, 21, 11, 12, 12, 12, 13, 11, 12, 0, 11, 12, 12, 12, 13, 11, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
46
[ "Motor-bike", "registration", "rose", "32.7", "percent", "in", "the", "period", "." ]
[ 16, 21, 38, 11, 21, 15, 12, 21, 7 ]
[ 11, 12, 21, 11, 12, 13, 11, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
47
[ "The", "growth", "was", "partly", "due", "to", "an", "increased", "number", "of", "Germans", "buying", "German", "cars", "abroad", ",", "while", "manufacturers", "said", "that", "domestic", "demand", "was", "weak", ",", "the", "federal", "office", "said", "." ]
[ 12, 21, 38, 30, 16, 35, 12, 40, 21, 15, 23, 39, 16, 24, 30, 6, 15, 24, 38, 12, 16, 21, 38, 16, 6, 12, 16, 21, 38, 7 ]
[ 11, 12, 21, 22, 22, 22, 11, 12, 12, 13, 11, 21, 11, 12, 3, 0, 17, 11, 21, 11, 12, 12, 21, 1, 0, 11, 12, 12, 21, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
48
[ "Almost", "all", "German", "car", "manufacturers", "posted", "gains", "in", "registration", "numbers", "in", "the", "period", "." ]
[ 30, 12, 16, 21, 24, 38, 24, 15, 21, 24, 15, 12, 21, 7 ]
[ 11, 12, 12, 12, 12, 21, 11, 13, 11, 12, 13, 11, 12, 0 ]
[ 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
49
[ "Volkswagen", "AG", "won", "77,719", "registrations", ",", "slightly", "more", "than", "a", "quarter", "of", "the", "total", "." ]
[ 22, 22, 38, 11, 24, 6, 30, 31, 15, 12, 21, 15, 12, 21, 7 ]
[ 11, 12, 21, 11, 12, 0, 11, 12, 12, 12, 12, 13, 11, 12, 0 ]
[ 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
50
[ "Opel", "AG", "together", "with", "General", "Motors", "came", "in", "second", "place", "with", "49,269", "registrations", ",", "16.4", "percent", "of", "the", "overall", "figure", "." ]
[ 22, 22, 30, 15, 22, 23, 38, 15, 16, 21, 15, 11, 24, 6, 11, 21, 15, 12, 16, 21, 7 ]
[ 11, 12, 3, 13, 11, 12, 21, 13, 11, 12, 13, 11, 12, 0, 11, 12, 13, 11, 12, 12, 0 ]
[ 3, 4, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
51
[ "Third", "was", "Ford", "with", "35,563", "registrations", ",", "or", "11.7", "percent", "." ]
[ 16, 38, 22, 15, 11, 24, 6, 10, 11, 21, 7 ]
[ 11, 21, 11, 13, 11, 12, 0, 0, 11, 12, 0 ]
[ 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0 ]
52
[ "Only", "Seat", "and", "Porsche", "had", "fewer", "registrations", "in", "July", "1996", "compared", "to", "last", "year", "'s", "July", "." ]
[ 30, 22, 10, 22, 38, 17, 24, 15, 22, 11, 40, 35, 16, 21, 27, 22, 7 ]
[ 11, 12, 12, 12, 21, 11, 12, 13, 11, 12, 13, 13, 11, 12, 11, 12, 0 ]
[ 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
53
[ "Seat", "posted", "3,420", "registrations", "compared", "with", "5522", "registrations", "in", "July", "a", "year", "earlier", "." ]
[ 21, 38, 11, 24, 40, 15, 11, 24, 15, 22, 12, 21, 31, 7 ]
[ 11, 21, 11, 12, 13, 13, 11, 12, 13, 11, 11, 12, 3, 0 ]
[ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
54
[ "Porsche", "'s", "registrations", "fell", "to", "554", "from", "643", "." ]
[ 22, 27, 24, 38, 35, 11, 15, 11, 7 ]
[ 11, 11, 12, 21, 13, 11, 13, 11, 0 ]
[ 3, 0, 0, 0, 0, 0, 0, 0, 0 ]
55
[ "GREEK", "SOCIALISTS", "GIVE", "GREEN", "LIGHT", "TO", "PM", "FOR", "ELECTIONS", "." ]
[ 16, 24, 41, 22, 22, 35, 22, 15, 24, 7 ]
[ 11, 12, 21, 11, 12, 13, 11, 13, 11, 0 ]
[ 7, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
56
[ "ATHENS", "1996-08-22" ]
[ 24, 11 ]
[ 11, 12 ]
[ 5, 0 ]
57
[ "The", "Greek", "socialist", "party", "'s", "executive", "bureau", "gave", "the", "green", "light", "to", "Prime", "Minister", "Costas", "Simitis", "to", "call", "snap", "elections", ",", "its", "general", "secretary", "Costas", "Skandalidis", "told", "reporters", "." ]
[ 12, 22, 16, 21, 27, 16, 21, 38, 12, 16, 21, 35, 22, 22, 42, 23, 35, 37, 37, 24, 6, 29, 16, 21, 22, 22, 38, 24, 7 ]
[ 11, 12, 12, 12, 11, 12, 12, 21, 11, 12, 12, 13, 11, 12, 21, 11, 21, 22, 22, 11, 0, 11, 12, 12, 11, 12, 21, 11, 0 ]
[ 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0 ]
58
[ "Prime", "Minister", "Costas", "Simitis", "is", "going", "to", "make", "an", "official", "announcement", "after", "a", "cabinet", "meeting", "later", "on", "Thursday", ",", "said", "Skandalidis", "." ]
[ 22, 22, 42, 22, 42, 39, 35, 37, 12, 16, 21, 15, 12, 21, 21, 30, 15, 22, 6, 38, 22, 7 ]
[ 11, 12, 21, 11, 21, 22, 22, 22, 11, 12, 12, 13, 11, 12, 12, 3, 13, 11, 0, 21, 11, 0 ]
[ 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 ]
59
[ "--", "Dimitris", "Kontogiannis", ",", "Athens", "Newsroom", "+301", "3311812-4" ]
[ 8, 22, 22, 6, 22, 22, 22, 11 ]
[ 0, 11, 12, 0, 11, 12, 12, 12 ]
[ 0, 1, 2, 0, 3, 4, 0, 0 ]
60
[ "BayerVB", "sets", "C$", "100", "million", "six-year", "bond", "." ]
[ 21, 42, 3, 11, 11, 16, 21, 7 ]
[ 11, 21, 11, 12, 12, 12, 12, 0 ]
[ 3, 0, 7, 0, 0, 0, 0, 0 ]
61
[ "LONDON", "1996-08-22" ]
[ 22, 11 ]
[ 11, 12 ]
[ 5, 0 ]
62
[ "The", "following", "bond", "was", "announced", "by", "lead", "manager", "Toronto", "Dominion", "." ]
[ 12, 39, 21, 38, 40, 15, 21, 21, 22, 22, 7 ]
[ 11, 12, 12, 21, 22, 13, 11, 12, 12, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0 ]
63
[ "BORROWER", "BAYERISCHE", "VEREINSBANK" ]
[ 22, 22, 22 ]
[ 11, 12, 12 ]
[ 0, 3, 4 ]
64
[ "AMT", "C$", "100", "MLN", "COUPON", "6.625", "MATURITY", "24.SEP.02" ]
[ 22, 3, 11, 22, 21, 11, 22, 11 ]
[ 0, 11, 12, 12, 12, 12, 12, 11 ]
[ 0, 7, 0, 0, 0, 0, 0, 0 ]
65
[ "TYPE", "STRAIGHT", "ISS", "PRICE", "100.92", "PAY", "DATE", "24.SEP.96" ]
[ 22, 22, 22, 22, 11, 21, 22, 11 ]
[ 11, 12, 12, 12, 12, 12, 12, 12 ]
[ 0, 0, 0, 0, 0, 0, 0, 0 ]
66
[ "FULL", "FEES", "1.875", "REOFFER", "99.32", "SPREAD", "+20", "BP" ]
[ 22, 24, 11, 22, 11, 22, 11, 22 ]
[ 11, 12, 11, 12, 12, 12, 12, 12 ]
[ 0, 0, 0, 0, 0, 0, 0, 0 ]
67
[ "MOODY", "AA1", "LISTING", "LUX", "PAY", "FREQ", "=" ]
[ 22, 22, 22, 22, 21, 24, 34 ]
[ 11, 11, 12, 12, 12, 12, 21 ]
[ 0, 0, 0, 0, 0, 0, 0 ]
68
[ "S&P", "=", "DENOMS", "(", "K", ")", "1-10-100", "SALE", "LIMITS", "US", "/", "UK", "/", "CA" ]
[ 22, 34, 22, 4, 22, 5, 11, 22, 42, 22, 34, 22, 34, 40 ]
[ 11, 21, 11, 0, 11, 0, 11, 12, 21, 11, 0, 11, 0, 1 ]
[ 3, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 5, 0, 5 ]
69
[ "NEG", "PLG", "NO", "CRS", "DEFLT", "NO", "FORCE", "MAJ", "=" ]
[ 22, 22, 12, 22, 22, 12, 41, 20, 34 ]
[ 11, 12, 12, 12, 12, 12, 21, 22, 22 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
70
[ "GOV", "LAW", "GERMAN", "HOME", "CTRY", "=", "TAX", "PROVS", "STANDARD" ]
[ 22, 22, 22, 22, 22, 34, 21, 24, 38 ]
[ 11, 12, 12, 12, 12, 21, 11, 12, 21 ]
[ 0, 0, 7, 0, 0, 0, 0, 0, 0 ]
71
[ "MGT", "/", "UND", "0.275", "SELL", "CONC", "1.60", "PRAECIP", "=" ]
[ 22, 34, 22, 11, 37, 22, 11, 22, 34 ]
[ 11, 0, 11, 12, 21, 11, 12, 12, 21 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
72
[ "UNDERLYING", "GOVT", "BOND", "7.0", "PCT", "SEPT", "2001" ]
[ 39, 16, 21, 11, 22, 22, 11 ]
[ 21, 11, 12, 12, 12, 12, 12 ]
[ 0, 0, 0, 0, 0, 0, 0 ]
73
[ "NOTES", "BAYERISCHE", "VEREINSBANK", "IS", "JOINT", "LEAD", "MANAGER" ]
[ 24, 41, 22, 42, 42, 22, 21 ]
[ 11, 21, 11, 21, 22, 11, 12 ]
[ 0, 3, 4, 0, 0, 0, 0 ]
74
[ "--", "London", "Newsroom", "+44", "171", "542", "7658" ]
[ 8, 22, 22, 11, 11, 11, 11 ]
[ 11, 12, 12, 12, 12, 12, 12 ]
[ 0, 3, 4, 0, 0, 0, 0 ]
75
[ "Venantius", "sets", "$", "300", "million", "January", "1999", "FRN", "." ]
[ 16, 24, 3, 11, 11, 22, 11, 22, 7 ]
[ 11, 12, 11, 12, 12, 12, 12, 12, 0 ]
[ 3, 0, 0, 0, 0, 0, 0, 0, 0 ]
76
[ "LONDON", "1996-08-22" ]
[ 22, 11 ]
[ 11, 12 ]
[ 5, 0 ]
77
[ "The", "following", "floating-rate", "issue", "was", "announced", "by", "lead", "manager", "Lehman", "Brothers", "International", "." ]
[ 12, 39, 21, 21, 38, 40, 15, 21, 21, 22, 23, 22, 7 ]
[ 11, 12, 12, 12, 21, 22, 13, 11, 12, 12, 12, 12, 0 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 0 ]
78
[ "BORROWER", "VENANTIUS", "AB", "(", "SWEDISH", "NATIONAL", "MORTGAGE", "AGENCY", ")" ]
[ 22, 22, 22, 4, 22, 22, 22, 21, 5 ]
[ 11, 12, 12, 0, 11, 12, 12, 12, 0 ]
[ 0, 3, 4, 0, 7, 0, 0, 0, 0 ]
79
[ "AMT", "$", "300", "MLN", "SPREAD", "-", "12.5", "BP", "MATURITY", "21.JAN.99" ]
[ 22, 3, 11, 22, 22, 8, 11, 22, 22, 11 ]
[ 0, 11, 12, 12, 12, 0, 11, 12, 12, 12 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
80
[ "TYPE", "FRN", "BASE", "3M", "LIBOR", "PAY", "DATE", "S23.SEP.96" ]
[ 22, 22, 22, 22, 22, 21, 22, 22 ]
[ 11, 12, 12, 12, 12, 12, 12, 12 ]
[ 0, 0, 0, 3, 0, 0, 0, 0 ]
81
[ "LAST", "MOODY", "AA3", "ISS", "PRICE", "99.956", "FULL", "FEES", "10", "BP" ]
[ 24, 22, 22, 22, 22, 11, 22, 24, 11, 22 ]
[ 11, 12, 11, 12, 12, 12, 12, 12, 11, 12 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
82
[ "LAST", "S&P", "AA+", "REOFFER", "=" ]
[ 22, 22, 22, 21, 34 ]
[ 11, 12, 12, 12, 21 ]
[ 0, 3, 0, 0, 0 ]
83
[ "NOTES", "S", "SHORT", "FIRST", "COUPON" ]
[ 24, 22, 22, 22, 22 ]
[ 11, 12, 12, 12, 12 ]
[ 0, 0, 0, 0, 0 ]
84
[ "LISTING", "LONDON", "DENOMS", "(", "K", ")", "1-10-100", "SALE", "LIMITS", "US", "/", "UK", "/", "JP", "/", "FR" ]
[ 39, 22, 22, 4, 22, 5, 11, 22, 42, 22, 34, 22, 34, 22, 34, 40 ]
[ 21, 11, 12, 0, 11, 0, 11, 12, 21, 11, 0, 11, 0, 11, 0, 1 ]
[ 0, 5, 0, 0, 0, 0, 0, 0, 0, 5, 0, 5, 0, 5, 0, 5 ]
85
[ "NEG", "PLG", "YES", "CRS", "DEFLT", "NO", "FORCE", "MAJ", "IPMA", "2" ]
[ 22, 22, 22, 22, 22, 12, 41, 20, 22, 11 ]
[ 11, 12, 12, 12, 12, 12, 21, 22, 11, 12 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
86
[ "GOV", "LAW", "ENGLISH", "HOME", "CTRY", "SWEDEN", "TAX", "PROVS", "STANDARD" ]
[ 22, 22, 22, 22, 22, 22, 21, 24, 38 ]
[ 11, 12, 12, 12, 12, 12, 12, 12, 21 ]
[ 0, 0, 7, 0, 0, 5, 0, 0, 0 ]
87
[ "MGT", "/", "UND", "5", "BP", "SELL", "CONC", "5", "BP", "PRAECIP", "=" ]
[ 22, 34, 22, 11, 22, 37, 22, 11, 22, 22, 34 ]
[ 11, 0, 11, 12, 12, 21, 11, 12, 12, 12, 21 ]
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
88
[ "NOTES", "ISSUED", "OFF", "EMTN", "PROGRAMME" ]
[ 24, 38, 21, 22, 22 ]
[ 11, 21, 11, 12, 12 ]
[ 0, 0, 0, 0, 0 ]
89
[ "--", "London", "Newsroom", "+44", "171", "542", "8863" ]
[ 8, 22, 22, 11, 11, 11, 11 ]
[ 11, 12, 12, 12, 12, 12, 12 ]
[ 0, 3, 4, 0, 0, 0, 0 ]
90
[ "Port", "conditions", "update", "-", "Syria", "-", "Lloyds", "Shipping", "." ]
[ 21, 24, 41, 8, 22, 8, 22, 22, 7 ]
[ 11, 12, 21, 0, 11, 0, 11, 12, 0 ]
[ 0, 0, 0, 0, 5, 0, 3, 4, 0 ]
91
[ "Port", "conditions", "from", "Lloyds", "Shipping", "Intelligence", "Service", "--" ]
[ 21, 24, 15, 22, 22, 22, 22, 8 ]
[ 11, 12, 13, 11, 12, 12, 12, 0 ]
[ 0, 0, 0, 3, 4, 4, 4, 0 ]
92
[ "LATTAKIA", ",", "Aug", "10", "-", "waiting", "time", "at", "Lattakia", "and", "Tartous", "presently", "24", "hours", "." ]
[ 22, 6, 22, 11, 8, 39, 21, 15, 22, 10, 22, 30, 11, 24, 7 ]
[ 11, 0, 11, 12, 12, 21, 11, 13, 11, 12, 12, 3, 11, 12, 0 ]
[ 5, 0, 0, 0, 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0 ]
93
[ "Israel", "plays", "down", "fears", "of", "war", "with", "Syria", "." ]
[ 22, 42, 33, 24, 15, 21, 15, 22, 7 ]
[ 11, 21, 15, 11, 13, 11, 13, 11, 0 ]
[ 5, 0, 0, 0, 0, 0, 0, 5, 0 ]
94
[ "Colleen", "Siegel" ]
[ 22, 22 ]
[ 11, 12 ]
[ 1, 2 ]
95
[ "JERUSALEM", "1996-08-22" ]
[ 40, 11 ]
[ 11, 12 ]
[ 5, 0 ]
96
[ "Israel", "'s", "outgoing", "peace", "negotiator", "with", "Syria", "said", "on", "Thursday", "current", "tensions", "between", "the", "two", "countries", "appeared", "to", "be", "a", "storm", "in", "a", "teacup", "." ]
[ 22, 27, 16, 21, 21, 15, 22, 38, 15, 22, 16, 24, 15, 12, 11, 24, 38, 35, 37, 12, 21, 15, 12, 21, 7 ]
[ 11, 11, 12, 12, 12, 13, 11, 21, 13, 11, 12, 12, 13, 11, 12, 12, 21, 22, 22, 11, 12, 13, 11, 12, 0 ]
[ 5, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
97
[ "Itamar", "Rabinovich", ",", "who", "as", "Israel", "'s", "ambassador", "to", "Washington", "conducted", "unfruitful", "negotiations", "with", "Syria", ",", "told", "Israel", "Radio", "it", "looked", "like", "Damascus", "wanted", "to", "talk", "rather", "than", "fight", "." ]
[ 22, 22, 6, 44, 15, 22, 27, 21, 35, 22, 38, 16, 24, 15, 22, 6, 38, 22, 22, 28, 38, 15, 22, 38, 35, 37, 30, 15, 21, 7 ]
[ 11, 12, 0, 11, 13, 11, 11, 12, 13, 11, 21, 11, 12, 13, 11, 0, 21, 11, 12, 11, 21, 13, 11, 21, 22, 22, 5, 6, 11, 0 ]
[ 1, 2, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 0, 3, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0 ]
98
[ "\"", "It", "appears", "to", "me", "the", "Syrian", "priority", "is", "still", "to", "negotiate", "." ]
[ 0, 28, 42, 35, 28, 12, 16, 21, 42, 30, 35, 37, 7 ]
[ 0, 11, 21, 13, 11, 11, 12, 12, 21, 3, 21, 22, 0 ]
[ 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0 ]
99
[ "The", "Syrians", "are", "confused", ",", "they", "are", "definitely", "tense", ",", "but", "the", "general", "assessment", "here", "in", "Washington", "is", "that", "this", "is", "essentially", "a", "storm", "in", "a", "teacup", ",", "\"", "he", "said", "." ]
[ 12, 23, 41, 40, 6, 28, 41, 30, 16, 6, 10, 12, 16, 21, 30, 15, 22, 42, 15, 12, 42, 30, 12, 21, 15, 12, 21, 6, 0, 28, 38, 7 ]
[ 11, 12, 21, 1, 0, 11, 21, 22, 11, 0, 0, 11, 12, 12, 3, 13, 11, 21, 17, 11, 21, 3, 11, 12, 13, 11, 12, 0, 0, 11, 21, 0 ]
[ 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]

Dataset Card for "conll2003"

Dataset Summary

The shared task of CoNLL-2003 concerns language-independent named entity recognition. We will concentrate on four types of named entities: persons, locations, organizations and names of miscellaneous entities that do not belong to the previous three groups.

The CoNLL-2003 shared task data files contain four columns separated by a single space. Each word has been put on a separate line and there is an empty line after each sentence. The first item on each line is a word, the second a part-of-speech (POS) tag, the third a syntactic chunk tag and the fourth the named entity tag. The chunk tags and the named entity tags have the format I-TYPE which means that the word is inside a phrase of type TYPE. Only if two phrases of the same type immediately follow each other, the first word of the second phrase will have tag B-TYPE to show that it starts a new phrase. A word with tag O is not part of a phrase. Note the dataset uses IOB2 tagging scheme, whereas the original dataset uses IOB1.

For more details see https://www.clips.uantwerpen.be/conll2003/ner/ and https://www.aclweb.org/anthology/W03-0419

Supported Tasks and Leaderboards

More Information Needed

Languages

More Information Needed

Dataset Structure

Data Instances

conll2003

  • Size of downloaded dataset files: 4.85 MB
  • Size of the generated dataset: 10.26 MB
  • Total amount of disk used: 15.11 MB

An example of 'train' looks as follows.

{
    "chunk_tags": [11, 12, 12, 21, 13, 11, 11, 21, 13, 11, 12, 13, 11, 21, 22, 11, 12, 17, 11, 21, 17, 11, 12, 12, 21, 22, 22, 13, 11, 0],
    "id": "0",
    "ner_tags": [0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    "pos_tags": [12, 22, 22, 38, 15, 22, 28, 38, 15, 16, 21, 35, 24, 35, 37, 16, 21, 15, 24, 41, 15, 16, 21, 21, 20, 37, 40, 35, 21, 7],
    "tokens": ["The", "European", "Commission", "said", "on", "Thursday", "it", "disagreed", "with", "German", "advice", "to", "consumers", "to", "shun", "British", "lamb", "until", "scientists", "determine", "whether", "mad", "cow", "disease", "can", "be", "transmitted", "to", "sheep", "."]
}

The original data files have -DOCSTART- lines used to separate documents, but these lines are removed here. Indeed -DOCSTART- is a special line that acts as a boundary between two different documents, and it is filtered out in this implementation.

Data Fields

The data fields are the same among all splits.

conll2003

  • id: a string feature.
  • tokens: a list of string features.
  • pos_tags: a list of classification labels (int). Full tagset with indices:
{'"': 0, "''": 1, '#': 2, '$': 3, '(': 4, ')': 5, ',': 6, '.': 7, ':': 8, '``': 9, 'CC': 10, 'CD': 11, 'DT': 12,
 'EX': 13, 'FW': 14, 'IN': 15, 'JJ': 16, 'JJR': 17, 'JJS': 18, 'LS': 19, 'MD': 20, 'NN': 21, 'NNP': 22, 'NNPS': 23,
 'NNS': 24, 'NN|SYM': 25, 'PDT': 26, 'POS': 27, 'PRP': 28, 'PRP$': 29, 'RB': 30, 'RBR': 31, 'RBS': 32, 'RP': 33,
 'SYM': 34, 'TO': 35, 'UH': 36, 'VB': 37, 'VBD': 38, 'VBG': 39, 'VBN': 40, 'VBP': 41, 'VBZ': 42, 'WDT': 43,
 'WP': 44, 'WP$': 45, 'WRB': 46}
  • chunk_tags: a list of classification labels (int). Full tagset with indices:
{'O': 0, 'B-ADJP': 1, 'I-ADJP': 2, 'B-ADVP': 3, 'I-ADVP': 4, 'B-CONJP': 5, 'I-CONJP': 6, 'B-INTJ': 7, 'I-INTJ': 8,
 'B-LST': 9, 'I-LST': 10, 'B-NP': 11, 'I-NP': 12, 'B-PP': 13, 'I-PP': 14, 'B-PRT': 15, 'I-PRT': 16, 'B-SBAR': 17,
 'I-SBAR': 18, 'B-UCP': 19, 'I-UCP': 20, 'B-VP': 21, 'I-VP': 22}
  • ner_tags: a list of classification labels (int). Full tagset with indices:
{'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}

Data Splits

name train validation test
conll2003 14041 3250 3453

Dataset Creation

Curation Rationale

More Information Needed

Source Data

Initial Data Collection and Normalization

More Information Needed

Who are the source language producers?

More Information Needed

Annotations

Annotation process

More Information Needed

Who are the annotators?

More Information Needed

Personal and Sensitive Information

More Information Needed

Considerations for Using the Data

Social Impact of Dataset

More Information Needed

Discussion of Biases

More Information Needed

Other Known Limitations

More Information Needed

Additional Information

Dataset Curators

More Information Needed

Licensing Information

From the CoNLL2003 shared task page:

The English data is a collection of news wire articles from the Reuters Corpus. The annotation has been done by people of the University of Antwerp. Because of copyright reasons we only make available the annotations. In order to build the complete data sets you will need access to the Reuters Corpus. It can be obtained for research purposes without any charge from NIST.

The copyrights are defined below, from the Reuters Corpus page:

The stories in the Reuters Corpus are under the copyright of Reuters Ltd and/or Thomson Reuters, and their use is governed by the following agreements:

Organizational agreement

This agreement must be signed by the person responsible for the data at your organization, and sent to NIST.

Individual agreement

This agreement must be signed by all researchers using the Reuters Corpus at your organization, and kept on file at your organization.

Citation Information

@inproceedings{tjong-kim-sang-de-meulder-2003-introduction,
    title = "Introduction to the {C}o{NLL}-2003 Shared Task: Language-Independent Named Entity Recognition",
    author = "Tjong Kim Sang, Erik F.  and
      De Meulder, Fien",
    booktitle = "Proceedings of the Seventh Conference on Natural Language Learning at {HLT}-{NAACL} 2003",
    year = "2003",
    url = "https://www.aclweb.org/anthology/W03-0419",
    pages = "142--147",
}

Contributions

Thanks to @jplu, @vblagoje, @lhoestq for adding this dataset.

Downloads last month
33,934

Models trained or fine-tuned on conll2003

Space using conll2003 1