1
2
3
4 """Some variables and functions to help handling characters categories."""
5
6
7 from unicodedata import category
8
9
10 blankspaces = ' \f\n\c\r\t\v
'
11 """@var: A string containing every blankspaces characters."""
12
13 separators = '`~!@#$%^&*()_-+=\\|]}[{\";:/?.>,<†„“।॥ו–´’‘‚י0123456789ः'
14 """@var: A string containing every separators characters."""
15
16
18 """Return the index of the first word character in a string.
19
20 @return:
21 The index of the first word character in the string 'string' or -1 if
22 the string contains no word character.
23 @rtype: int
24 """
25 for i, ch in enumerate(string):
26 if is_word_char(ch):
27 return i
28 return -1
29
30
32 """Return the index of the last word character in a string.
33
34 @return:
35 The index of the last word character in the string 'string' or -1 if
36 the string contains no word character.
37 @rtype: int
38 """
39 result = first_word_char(string[::-1])
40 if result == -1:
41 return -1
42 return len(string) - result - 1
43
44
46 """Check if a character is a word character.
47
48 @return:
49 True or False weither the character 'char' is a word character, i.e. a
50 printable non-ponctuation character.
51 @rtype: bool
52 """
53 if category(char)[0] == "L":
54 return True
55 return False
56