bytetrie/examples/geonames.py

83 lines
2 KiB
Python
Raw Normal View History

import csv
from bytetrie import ByteTrie
def load_geonames():
t = ByteTrie(multi_value=True)
with open("cities500.txt", "r") as f:
reader = csv.reader(f, delimiter='\t')
for i, row in enumerate(reader):
try:
t.insert(row[1].encode("utf-8"), row[17])
except Exception:
print(f"Error in row {i}")
print(f"Label: '{row[1]}'")
print(f"Type: {type(row[1])}")
raise
return t
def insert(trie):
""" Shall only be used to insert strings """
t = trie
def _insert(*vals):
for val in vals:
t.insert(val.encode('utf-8'), val)
return _insert
def load_simple_trie():
t = ByteTrie()
ins = insert(t)
ins("A")
ins("AA", "AB")
ins("ABCDE")
ins("AACDEF", "AACDEGG", "AACDEH")
return t
# This uses internal representations which are not supposed to
# be used as public API and are subject to change!
from bytetrie.bytetrie import ByteTrie, Node, Root, Child, Terminal
def geonames_to_dot(t: ByteTrie):
dot_buffer = str()
dot_buffer += """strict digraph {
graph [
bgcolor="transparent"
];
edge [
arrowhead="none",
penwidth="0.05",
];
node [
label="",
sep="2"
];
root [shape="circle", width="0.4"]
"""
hue_inc = 1/len(t.root.children)
hue = 0
for child in t.root.children:
dot_buffer += _geonames_node_to_dot(t.root, child, 1, hue)
hue += hue_inc
dot_buffer += "}"
return dot_buffer
def _geonames_node_to_dot(p: Node, n: Child, depth, hue):
db = f'{p.dot_id()} -> {n.dot_id()} [color="{hue},{depth*0.1},85"]\n'
db += f'{n.dot_id()} [color="{hue},{depth*0.1},50", shape="circle", width="0.1"]\n'
for child in n.children:
db += _geonames_node_to_dot(n, child, depth+1, hue)
return db
if __name__ == "__main__":
t = load_geonames()
s = geonames_to_dot(t)
with open("geo_dot.dot", "w") as f:
f.write(s)