-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfind_unsure.py
More file actions
53 lines (44 loc) · 1.53 KB
/
find_unsure.py
File metadata and controls
53 lines (44 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
import os
import sys
import xml.etree.ElementTree as ET
class Generate(object):
def __init__(self):
self.f_loc = "C:\\liuclp\\June2014sms-with-pro\\"
self.f = os.listdir(self.f_loc)
def load_data(self):
self.xmlfile = []
for x in self.f:
if x.endswith('.xml'):
fname = self.f_loc+x
self.xmlfile.append(fname)
def extr_data(self):
for x in self.xmlfile:
self.tree = ET.parse(x)
self.root = self.tree.getroot()
#print root[1]
#root[1] is all the tags
for tags in self.root[1]:
#print tags.tag, tags.attrib["type"], tags.attrib["start"], tags.attrib["end"]
if tags.attrib["type"] == "unsure":
print tags.tag,tags.attrib["type"],tags.attrib["start"],tags.attrib["end"],
print self.locate(int(tags.attrib["start"]))
print
def locate(self, start):
self.text = self.root[0].text
offsetid = []
for i in range(len(self.text)):
x = self.text[i]
if x == "\n":
offsetid.append(i)
sentid = 0
for i in range(len(offsetid)):
off = offsetid[i]
if start < off:
print self.text[offsetid[i-1]+1:offsetid[i]],
return sentid
else:
sentid += 1
e = Generate()
e.load_data()
e.extr_data()