1
2
3
4 """Some variables and functions to help handling characters categories."""
5
6
7 from __future__ import absolute_import, unicode_literals
8 import unicodedata
9
10
11 blankspaces = ' \f\n\c\r\t\v
'
12 """@var: A string containing every blankspaces characters."""
13
14 separators = '`~!@#$%^&*()_-+=\\|]}[{\";:/?.>,<†„“।॥ו–´’‘‚י0123456789ः'
15 """@var: A string containing every separators characters."""
16
17
19 """Return the index of the first word character in a string.
20
21 @return:
22 The index of the first word character in the string 'string' or -1 if
23 the string contains no word character.
24 @rtype: int
25 """
26 for i, ch in enumerate(string):
27 if is_word_char(ch):
28 return i
29 return -1
30
31
33 """Return the index of the last word character in a string.
34
35 @return:
36 The index of the last word character in the string 'string' or -1 if
37 the string contains no word character.
38 @rtype: int
39 """
40 result = first_word_char(string[::-1])
41 if result == -1:
42 return -1
43 return len(string) - result - 1
44
45
47 """Check if a character is a word character.
48
49 @return:
50 True or False weither the character 'char' is a word character, i.e. a
51 printable non-ponctuation character.
52 @rtype: bool
53 """
54 if unicodedata.category(char)[0] == "L":
55 return True
56 return False
57