Geonames 500 example trie, build status, trie graph
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
c6fad6471f
commit
d8d8436325
5 changed files with 93 additions and 0 deletions
|
@ -1,6 +1,10 @@
|
|||
[![Build Status](https://drone.friedl.net/api/badges/incubator/bytetrie/status.svg)](https://drone.friedl.net/incubator/bytetrie)
|
||||
|
||||
# Bytetrie
|
||||
A fast, dependency-free, self-compressing trie with radix 256 in pure python.
|
||||
|
||||
![](trie.png)
|
||||
|
||||
Bytetrie allows fast prefix search in a large corpus of keys. Each key can be
|
||||
associated with arbitrary data. It features fast lookup times at the cost of
|
||||
expensive insertion. A Bytetrie is best used if it can be pre-filled with data.
|
||||
|
|
BIN
examples/cities500.tar.gz
Normal file
BIN
examples/cities500.tar.gz
Normal file
Binary file not shown.
7
examples/gen.sh
Executable file
7
examples/gen.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/bin/sh
|
||||
|
||||
xwim cities500.tar.gz
|
||||
mv cities500/cities500.txt .
|
||||
rmdir cities500
|
||||
python geonames.py
|
||||
twopi -Tpng geo_dot.dot -o geo_dot_twopi.png -Groot=root -x
|
82
examples/geonames.py
Normal file
82
examples/geonames.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
import csv
|
||||
from bytetrie import ByteTrie
|
||||
|
||||
def load_geonames():
|
||||
t = ByteTrie(multi_value=True)
|
||||
with open("cities500.txt", "r") as f:
|
||||
reader = csv.reader(f, delimiter='\t')
|
||||
for i, row in enumerate(reader):
|
||||
try:
|
||||
t.insert(row[1].encode("utf-8"), row[17])
|
||||
except Exception:
|
||||
print(f"Error in row {i}")
|
||||
print(f"Label: '{row[1]}'")
|
||||
print(f"Type: {type(row[1])}")
|
||||
raise
|
||||
return t
|
||||
|
||||
def insert(trie):
|
||||
""" Shall only be used to insert strings """
|
||||
t = trie
|
||||
def _insert(*vals):
|
||||
for val in vals:
|
||||
t.insert(val.encode('utf-8'), val)
|
||||
return _insert
|
||||
|
||||
def load_simple_trie():
|
||||
t = ByteTrie()
|
||||
ins = insert(t)
|
||||
ins("A")
|
||||
ins("AA", "AB")
|
||||
ins("ABCDE")
|
||||
ins("AACDEF", "AACDEGG", "AACDEH")
|
||||
return t
|
||||
|
||||
# This uses internal representations which are not supposed to
|
||||
# be used as public API and are subject to change!
|
||||
from bytetrie.bytetrie import ByteTrie, Node, Root, Child, Terminal
|
||||
def geonames_to_dot(t: ByteTrie):
|
||||
dot_buffer = str()
|
||||
dot_buffer += """strict digraph {
|
||||
graph [
|
||||
bgcolor="transparent"
|
||||
];
|
||||
|
||||
edge [
|
||||
arrowhead="none",
|
||||
penwidth="0.05",
|
||||
];
|
||||
|
||||
node [
|
||||
label="",
|
||||
sep="2"
|
||||
];
|
||||
|
||||
root [shape="circle", width="0.4"]
|
||||
"""
|
||||
|
||||
hue_inc = 1/len(t.root.children)
|
||||
hue = 0
|
||||
|
||||
for child in t.root.children:
|
||||
dot_buffer += _geonames_node_to_dot(t.root, child, 1, hue)
|
||||
hue += hue_inc
|
||||
|
||||
dot_buffer += "}"
|
||||
return dot_buffer
|
||||
|
||||
def _geonames_node_to_dot(p: Node, n: Child, depth, hue):
|
||||
db = f'{p.dot_id()} -> {n.dot_id()} [color="{hue},{depth*0.1},85"]\n'
|
||||
db += f'{n.dot_id()} [color="{hue},{depth*0.1},50", shape="circle", width="0.1"]\n'
|
||||
|
||||
for child in n.children:
|
||||
db += _geonames_node_to_dot(n, child, depth+1, hue)
|
||||
|
||||
return db
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
t = load_geonames()
|
||||
s = geonames_to_dot(t)
|
||||
with open("geo_dot.dot", "w") as f:
|
||||
f.write(s)
|
BIN
trie.png
Normal file
BIN
trie.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.5 MiB |
Loading…
Reference in a new issue