forked from extern/Unexpected-Keyboard
Allow compose sequence ending with more symbols
Change the compose state machine definition to allow final states that are wider than 16-bits. This increases the number of sequences that can be used from en_US_UTF_8_Compose.pre from 2013 to 2043 (of 3201).
This commit is contained in:
parent
f7f1d85f80
commit
39b3f50aa3
@ -64,11 +64,6 @@ def parse_sequences_file_xkb(fname):
|
|||||||
def parse_seq_result(r):
|
def parse_seq_result(r):
|
||||||
if len(r) == 2 and r[0] == '\\':
|
if len(r) == 2 and r[0] == '\\':
|
||||||
return r[1]
|
return r[1]
|
||||||
# The state machine can't represent characters that do not fit in a
|
|
||||||
# 16-bit char. This breaks some sequences that output letters with
|
|
||||||
# combined diacritics or emojis.
|
|
||||||
if len(r) > 1 or ord(r[0]) > 65535:
|
|
||||||
raise Exception("Char out of range: " + r)
|
|
||||||
return r
|
return r
|
||||||
# Populate [char_names] with the information present in the file.
|
# Populate [char_names] with the information present in the file.
|
||||||
with open(fname, "r") as inp:
|
with open(fname, "r") as inp:
|
||||||
@ -146,7 +141,15 @@ def make_automata(tree_root):
|
|||||||
states[i] = (c, node_i)
|
states[i] = (c, node_i)
|
||||||
i += 1
|
i += 1
|
||||||
def add_leaf(c):
|
def add_leaf(c):
|
||||||
states.append((c, 1))
|
# There are two encoding for leafs: character final state for 15-bit
|
||||||
|
# characters and string final state for the rest.
|
||||||
|
if len(c) > 1 or ord(c[0]) > 32767: # String final state
|
||||||
|
cb = c.encode("UTF-16")
|
||||||
|
states.append((-1, len(cb) + 1))
|
||||||
|
for c in cb:
|
||||||
|
states.append((c, 0))
|
||||||
|
else: # Character final state
|
||||||
|
states.append((c, 1))
|
||||||
def add_node(n):
|
def add_node(n):
|
||||||
if type(n) == str:
|
if type(n) == str:
|
||||||
add_leaf(n)
|
add_leaf(n)
|
||||||
@ -169,6 +172,7 @@ def gen_java(machine):
|
|||||||
chars_map = {
|
chars_map = {
|
||||||
# These characters cannot be used in unicode form as Java's parser
|
# These characters cannot be used in unicode form as Java's parser
|
||||||
# unescape unicode sequences before parsing.
|
# unescape unicode sequences before parsing.
|
||||||
|
-1: "\\uFFFF",
|
||||||
"\"": "\\\"",
|
"\"": "\\\"",
|
||||||
"\\": "\\\\",
|
"\\": "\\\\",
|
||||||
"\n": "\\n",
|
"\n": "\\n",
|
||||||
|
@ -27,31 +27,48 @@ public final class ComposeKey
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Apply the pending compose sequence to char [c]. */
|
/** Apply the pending compose sequence to char [c]. */
|
||||||
static KeyValue apply(int state, char c)
|
static KeyValue apply(int prev, char c)
|
||||||
{
|
{
|
||||||
char[] states = ComposeKeyData.states;
|
char[] states = ComposeKeyData.states;
|
||||||
char[] edges = ComposeKeyData.edges;
|
char[] edges = ComposeKeyData.edges;
|
||||||
int length = edges[state];
|
int prev_length = edges[prev];
|
||||||
int next = Arrays.binarySearch(states, state + 1, state + length, c);
|
int next = Arrays.binarySearch(states, prev + 1, prev + prev_length, c);
|
||||||
if (next < 0)
|
if (next < 0)
|
||||||
return null;
|
return null;
|
||||||
next = edges[next];
|
next = edges[next];
|
||||||
// The next state is the end of a sequence, show the result.
|
char next_header = states[next];
|
||||||
if (edges[next] == 1)
|
if (next_header == 0) // Enter a new intermediate state.
|
||||||
return KeyValue.makeCharKey(states[next]);
|
return KeyValue.makeComposePending(String.valueOf(c), next, 0);
|
||||||
return KeyValue.makeComposePending(String.valueOf(c), next, 0);
|
else if (next_header > 0) // Character final state.
|
||||||
|
return KeyValue.makeCharKey(next_header);
|
||||||
|
else // next_header is < 0, string final state.
|
||||||
|
{
|
||||||
|
int next_length = edges[next];
|
||||||
|
return KeyValue.makeStringKey(
|
||||||
|
new String(states, next + 1, next + next_length));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The [states] array represents the different states and their transition.
|
/** The state machine is comprised of two arrays.
|
||||||
A state occupies one or several cells of the array:
|
|
||||||
- The first cell is the result of the compose sequence if the state is of
|
The [states] array represents the different states and the associated
|
||||||
size 1, unspecified otherwise.
|
transitions:
|
||||||
- The remaining cells are the transitions, sorted alphabetically.
|
- The first cell is the header cell, [states[s]].
|
||||||
|
- If the header is equal to [0],
|
||||||
|
The remaining cells are the transitions characters, sorted
|
||||||
|
alphabetically.
|
||||||
|
- If the header is positive,
|
||||||
|
This is a final state, [states[s]] is the result of the sequence.
|
||||||
|
In this case, [edges[s]] must be equal to [1].
|
||||||
|
- If the header is equal to [-1],
|
||||||
|
This is a final state, the remaining cells represent the result string
|
||||||
|
which starts at index [s + 1] and has a length of [edges[s] - 1].
|
||||||
|
|
||||||
The [edges] array represents the transition state corresponding to each
|
The [edges] array represents the transition state corresponding to each
|
||||||
accepted inputs.
|
accepted inputs.
|
||||||
Id [states[i]] is the first cell of a state, [edges[i]] is the number of
|
- If [states[s]] is a header cell, [edges[s]] is the number of cells
|
||||||
cells occupied by the state [i].
|
occupied by the state [s], including the header cell.
|
||||||
If [states[i]] is a transition, [edges[i]] is the index of the state to
|
- If [states[s]] is a transition, [edges[s]] is the index of the state to
|
||||||
jump into. */
|
jump into.
|
||||||
|
- If [states[s]] is a part of a final state, [edges[s]] is not used. */
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user