Skip to content
This repository was archived by the owner on Feb 22, 2020. It is now read-only.

Commit 3aab341

Browse files
author
Han Xiao
authored
Merge pull request #311 from gnes-ai/feat-flow
feat(flow): first version of gnes flow
2 parents 12f7b70 + c5af930 commit 3aab341

File tree

18 files changed

+807
-36
lines changed

18 files changed

+807
-36
lines changed

gnes/cli/api.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@ def route(args):
4747

4848
def frontend(args):
4949
from ..service.frontend import FrontendService
50-
import threading
51-
with FrontendService(args):
52-
forever = threading.Event()
53-
forever.wait()
50+
_start_service(FrontendService, args)
5451

5552

5653
def client(args):

gnes/cli/parser.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,27 @@ def resolve_py_path(path):
4747
return path
4848

4949

50-
def resolve_yaml_path(path):
50+
def random_port(port):
51+
if not port or int(port) <= 0:
52+
import random
53+
min_port, max_port = 49152, 65536
54+
return random.randrange(min_port, max_port)
55+
else:
56+
return int(port)
57+
58+
59+
def resolve_yaml_path(path, to_stream=False):
5160
# priority, filepath > classname > default
5261
import os
5362
import io
5463
if hasattr(path, 'read'):
5564
# already a readable stream
5665
return path
5766
elif os.path.exists(path):
58-
return open(path, encoding='utf8')
67+
if to_stream:
68+
return open(path, encoding='utf8')
69+
else:
70+
return path
5971
elif path.isidentifier():
6072
# possible class name
6173
return io.StringIO('!%s {}' % path)
@@ -68,8 +80,9 @@ def resolve_yaml_path(path):
6880

6981

7082
def set_base_parser():
71-
from .. import __version__
83+
from .. import __version__, __proto_version__
7284
from termcolor import colored
85+
import os
7386
# create the top-level parser
7487
parser = argparse.ArgumentParser(
7588
description='%s, a cloud-native semantic search system '
@@ -79,7 +92,9 @@ def set_base_parser():
7992
colored('GNES v%s: Generic Neural Elastic Search' % __version__, 'green'),
8093
colored('https://gnes.ai', 'cyan', attrs=['underline'])),
8194
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
82-
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
95+
parser.add_argument('-v', '--version', action='version',
96+
version='%(prog)s' + ': %s\nprotobuf: %s\nvcs_version: %s' %
97+
(__version__, __proto_version__, os.environ.get('GNES_VCS_VERSION', 'unknown')))
8398
parser.add_argument('--verbose', action='store_true', default=False,
8499
help='turn on detailed logging for debug')
85100
return parser
@@ -98,7 +113,7 @@ def set_composer_parser(parser=None):
98113
type=str,
99114
default='GNES app',
100115
help='name of the instance')
101-
parser.add_argument('--yaml_path', type=resolve_yaml_path,
116+
parser.add_argument('--yaml_path', type=lambda x: resolve_yaml_path(x, True),
102117
default=resource_stream(
103118
'gnes', '/'.join(('resources', 'compose', 'gnes-example.yml'))),
104119
help='yaml config of the service')
@@ -139,14 +154,14 @@ def set_composer_flask_parser(parser=None):
139154

140155
def set_service_parser(parser=None):
141156
from ..service.base import SocketType, BaseService, ParallelType
142-
import random
157+
143158
import os
144159
if not parser:
145160
parser = set_base_parser()
146-
min_port, max_port = 49152, 65536
147-
parser.add_argument('--port_in', type=int, default=random.randrange(min_port, max_port),
161+
162+
parser.add_argument('--port_in', type=int, default=random_port(-1),
148163
help='port for input data, default a random port between [49152, 65536]')
149-
parser.add_argument('--port_out', type=int, default=random.randrange(min_port, max_port),
164+
parser.add_argument('--port_out', type=int, default=random_port(-1),
150165
help='port for output data, default a random port between [49152, 65536]')
151166
parser.add_argument('--host_in', type=str, default=BaseService.default_host,
152167
help='host address for input')
@@ -158,8 +173,7 @@ def set_service_parser(parser=None):
158173
parser.add_argument('--socket_out', type=SocketType.from_string, choices=list(SocketType),
159174
default=SocketType.PUSH_BIND,
160175
help='socket type for output port')
161-
parser.add_argument('--port_ctrl', type=int,
162-
default=int(os.environ.get('GNES_CONTROL_PORT', random.randrange(min_port, max_port))),
176+
parser.add_argument('--port_ctrl', type=int, default=os.environ.get('GNES_CONTROL_PORT', random_port(-1)),
163177
help='port for controlling the service, default a random port between [49152, 65536]')
164178
parser.add_argument('--timeout', type=int, default=-1,
165179
help='timeout (ms) of all communication, -1 for waiting forever')

gnes/client/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,14 @@ def __init__(self, args):
130130
)
131131
self.logger.info('waiting channel to be ready...')
132132
grpc.channel_ready_future(self._channel).result()
133-
self.logger.critical('gnes client ready!')
134133

135134
# create new stub
136135
self.logger.info('create new stub...')
137136
self._stub = gnes_pb2_grpc.GnesRPCStub(self._channel)
138137

139138
# attache response handler
140139
self.handler._context = self
140+
self.logger.critical('gnes client ready at %s:%d!' % (self.args.grpc_host, self.args.grpc_port))
141141

142142
def call(self, request):
143143
resp = self._stub.call(request)
@@ -158,13 +158,13 @@ def _handler_response_default(self, msg: 'gnes_pb2.Response'):
158158
pass
159159

160160
def __enter__(self):
161-
self.open()
161+
self.start()
162162
return self
163163

164164
def __exit__(self, exc_type, exc_val, exc_tb):
165165
self.close()
166166

167-
def open(self):
167+
def start(self):
168168
pass
169169

170170
def close(self):

gnes/client/cli.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,33 @@
2626

2727

2828
class CLIClient(GrpcClient):
29-
def __init__(self, args):
29+
def __init__(self, args, start_at_init: bool = True):
3030
super().__init__(args)
31-
getattr(self, self.args.mode)()
32-
self.close()
31+
self._bytes_generator = self._get_bytes_generator_from_args(args)
32+
if start_at_init:
33+
self.start()
34+
35+
@staticmethod
36+
def _get_bytes_generator_from_args(args):
37+
if args.txt_file:
38+
all_bytes = (v.encode() for v in args.txt_file)
39+
elif args.image_zip_file:
40+
zipfile_ = zipfile.ZipFile(args.image_zip_file)
41+
all_bytes = (zipfile_.open(v).read() for v in zipfile_.namelist())
42+
elif args.video_zip_file:
43+
zipfile_ = zipfile.ZipFile(args.video_zip_file)
44+
all_bytes = (zipfile_.open(v).read() for v in zipfile_.namelist())
45+
else:
46+
all_bytes = None
47+
return all_bytes
48+
49+
def start(self):
50+
try:
51+
getattr(self, self.args.mode)()
52+
except Exception as ex:
53+
self.logger.error(ex)
54+
finally:
55+
self.close()
3356

3457
def train(self):
3558
with ProgressBar(task_name=self.args.mode) as p_bar:
@@ -64,18 +87,16 @@ def query_callback(self, req: 'gnes_pb2.Request', resp: 'gnes_pb2.Response'):
6487

6588
@property
6689
def bytes_generator(self) -> Generator[bytes, None, None]:
67-
if self.args.txt_file:
68-
all_bytes = (v.encode() for v in self.args.txt_file)
69-
elif self.args.image_zip_file:
70-
zipfile_ = zipfile.ZipFile(self.args.image_zip_file)
71-
all_bytes = (zipfile_.open(v).read() for v in zipfile_.namelist())
72-
elif self.args.video_zip_file:
73-
zipfile_ = zipfile.ZipFile(self.args.video_zip_file)
74-
all_bytes = (zipfile_.open(v).read() for v in zipfile_.namelist())
90+
if self._bytes_generator:
91+
return self._bytes_generator
7592
else:
76-
raise AttributeError('--txt_file, --image_zip_file, --video_zip_file one must be given')
93+
raise ValueError('bytes_generator is empty or not set')
7794

78-
return all_bytes
95+
@bytes_generator.setter
96+
def bytes_generator(self, bytes_gen: Generator[bytes, None, None]):
97+
if self._bytes_generator:
98+
self.logger.warning('bytes_generator is not empty, overrided')
99+
self._bytes_generator = bytes_gen
79100

80101

81102
class ProgressBar:

gnes/composer/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# COMPOSER WILL BE RETIRED IN THE FUTURE VERSION!!!
2+
# COMPOSER WILL BE RETIRED IN THE FUTURE VERSION!!!
3+
# COMPOSER WILL BE RETIRED IN THE FUTURE VERSION!!!

gnes/encoder/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@
4545
'VggishEncoder': 'audio.vggish',
4646
'YouTube8MFeatureExtractor': 'video.yt8m_feature_extractor',
4747
'YouTube8MEncoder': 'video.yt8m_model',
48-
'QuantizerEncoder': 'numeric.quantizer'
48+
'QuantizerEncoder': 'numeric.quantizer',
49+
'CharEmbeddingEncoder': 'text.char'
4950
}
5051

5152
register_all_class(_cls2file_map, 'encoder')

gnes/encoder/text/char.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Tencent is pleased to support the open source community by making GNES available.
2+
#
3+
# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
17+
from typing import List
18+
19+
import numpy as np
20+
21+
from ..base import BaseTextEncoder
22+
from ...helper import batching, as_numpy_array
23+
24+
25+
class CharEmbeddingEncoder(BaseTextEncoder):
26+
"""A random character embedding model. Only useful for testing"""
27+
is_trained = True
28+
29+
def __init__(self, dim: int = 128, *args, **kwargs):
30+
super().__init__(*args, **kwargs)
31+
self.dim = dim
32+
self.offset = 32
33+
self.unknown_idx = 96
34+
# in total 96 printable chars and 2 special chars = 98
35+
self._char_embedding = np.random.random([97, dim])
36+
37+
@batching
38+
@as_numpy_array
39+
def encode(self, text: List[str], *args, **kwargs) -> List[np.ndarray]:
40+
# tokenize text
41+
sent_embed = []
42+
for sent in text:
43+
ids = [ord(c) - 32 if 32 <= ord(c) <= 127 else self.unknown_idx for c in sent]
44+
sent_embed.append(np.mean(self._char_embedding[ids], axis=0))
45+
return sent_embed

0 commit comments

Comments
 (0)