diff --git a/src/lib/geventwebsocket/AUTHORS b/src/lib/geventwebsocket/AUTHORS new file mode 100644 index 00000000..02de7096 --- /dev/null +++ b/src/lib/geventwebsocket/AUTHORS @@ -0,0 +1,9 @@ +This Websocket library for Gevent is written and maintained by + + Jeffrey Gelens + + +Contributors: + + Denis Bilenko + Lon Ingram diff --git a/src/lib/geventwebsocket/LICENSE b/src/lib/geventwebsocket/LICENSE new file mode 100644 index 00000000..2526edb3 --- /dev/null +++ b/src/lib/geventwebsocket/LICENSE @@ -0,0 +1,13 @@ + Copyright 2011-2017 Jeffrey Gelens + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/lib/geventwebsocket/__init__.py b/src/lib/geventwebsocket/__init__.py index 7e2e0167..5ee3f961 100644 --- a/src/lib/geventwebsocket/__init__.py +++ b/src/lib/geventwebsocket/__init__.py @@ -1,4 +1,4 @@ -VERSION = (0, 9, 3, 'final', 0) +VERSION = (0, 10, 1, 'final', 0) __all__ = [ 'WebSocketApplication', diff --git a/src/lib/geventwebsocket/_compat.py b/src/lib/geventwebsocket/_compat.py new file mode 100644 index 00000000..70354135 --- /dev/null +++ b/src/lib/geventwebsocket/_compat.py @@ -0,0 +1,23 @@ +from __future__ import absolute_import, division, print_function + +import sys +import codecs + + +PY3 = sys.version_info[0] == 3 +PY2 = sys.version_info[0] == 2 + + +if PY2: + bytes = str + text_type = unicode + string_types = basestring + range_type = xrange + iteritems = lambda x: x.iteritems() + # b = lambda x: x +else: + text_type = str + string_types = str, + range_type = range + iteritems = lambda x: iter(x.items()) + # b = lambda x: codecs.latin_1_encode(x)[0] diff --git a/src/lib/geventwebsocket/handler.py b/src/lib/geventwebsocket/handler.py index c40b03c1..8aec77c0 100644 --- a/src/lib/geventwebsocket/handler.py +++ b/src/lib/geventwebsocket/handler.py @@ -1,10 +1,8 @@ -# Modified: Werkzeug Debugger workaround in run_websocket(self): - import base64 import hashlib -import warnings from gevent.pywsgi import WSGIHandler +from ._compat import PY3 from .websocket import WebSocket, Stream from .logging import create_logger @@ -51,10 +49,7 @@ class WebSocketHandler(WSGIHandler): try: self.server.clients[self.client_address] = Client( self.client_address, self.websocket) - if self.application.__class__.__name__ == "DebuggedApplication": # Modified: Werkzeug Debugger workaround (https://bitbucket.org/Jeffrey/gevent-websocket/issue/53/if-the-application-returns-a-generator-we) - list(self.application(self.environ, lambda s, h: [])) - else: - self.application(self.environ, lambda s, h: []) + list(self.application(self.environ, lambda s, h, e=None: [])) finally: del self.server.clients[self.client_address] if not self.websocket.closed: @@ -65,8 +60,7 @@ class WebSocketHandler(WSGIHandler): self.websocket = None def run_application(self): - if (hasattr(self.server, 'pre_start_hook') - and self.server.pre_start_hook): + if (hasattr(self.server, 'pre_start_hook') and self.server.pre_start_hook): self.logger.debug("Calling pre-start hook") if self.server.pre_start_hook(self): return super(WebSocketHandler, self).run_application() @@ -126,7 +120,7 @@ class WebSocketHandler(WSGIHandler): if self.request_version != 'HTTP/1.1': self.start_response('402 Bad Request', []) - self.logger.warning("Bad server protocol in headers: %s" % self.request_version) + self.logger.warning("Bad server protocol in headers") return ['Bad protocol version'] @@ -217,11 +211,17 @@ class WebSocketHandler(WSGIHandler): 'wsgi.websocket': self.websocket }) + if PY3: + accept = base64.b64encode( + hashlib.sha1((key + self.GUID).encode("latin-1")).digest() + ).decode("latin-1") + else: + accept = base64.b64encode(hashlib.sha1(key + self.GUID).digest()) + headers = [ ("Upgrade", "websocket"), ("Connection", "Upgrade"), - ("Sec-WebSocket-Accept", base64.b64encode( - hashlib.sha1(key + self.GUID).digest())), + ("Sec-WebSocket-Accept", accept) ] if protocol: @@ -238,7 +238,7 @@ class WebSocketHandler(WSGIHandler): return self.server.logger def log_request(self): - if '101' not in self.status: + if '101' not in str(self.status): self.logger.info(self.format_request()) @property diff --git a/src/lib/geventwebsocket/logging.py b/src/lib/geventwebsocket/logging.py index ac0c9692..554ca02d 100644 --- a/src/lib/geventwebsocket/logging.py +++ b/src/lib/geventwebsocket/logging.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -from logging import getLogger, StreamHandler, getLoggerClass, Formatter, DEBUG, INFO +from logging import getLogger, StreamHandler, getLoggerClass, Formatter, DEBUG def create_logger(name, debug=False, format=None): @@ -27,6 +27,5 @@ def create_logger(name, debug=False, format=None): del logger.handlers[:] logger.__class__ = DebugLogger logger.addHandler(handler) - logger.setLevel(INFO) return logger diff --git a/src/lib/geventwebsocket/protocols/wamp.py b/src/lib/geventwebsocket/protocols/wamp.py index b5586537..c89775be 100644 --- a/src/lib/geventwebsocket/protocols/wamp.py +++ b/src/lib/geventwebsocket/protocols/wamp.py @@ -11,6 +11,7 @@ except ImportError: except ImportError: import json +from .._compat import range_type, string_types from ..exceptions import WebSocketError from .base import BaseProtocol @@ -131,7 +132,7 @@ class WampProtocol(BaseProtocol): self.prefixes = Prefixes() self.session_id = ''.join( [random.choice(string.digits + string.letters) - for i in xrange(16)]) + for i in range_type(16)]) super(WampProtocol, self).__init__(*args, **kwargs) @@ -168,9 +169,9 @@ class WampProtocol(BaseProtocol): call_id, curie_or_uri = data[1:3] args = data[3:] - if not isinstance(call_id, (str, unicode)): + if not isinstance(call_id, string_types): raise Exception() - if not isinstance(curie_or_uri, (str, unicode)): + if not isinstance(curie_or_uri, string_types): raise Exception() uri = self.prefixes.resolve(curie_or_uri) @@ -178,7 +179,7 @@ class WampProtocol(BaseProtocol): try: result = self.procedures.call(uri, args) result_msg = [self.MSG_CALL_RESULT, call_id, result] - except Exception, e: + except Exception as e: result_msg = [self.MSG_CALL_ERROR, call_id] + self._get_exception_info(e) @@ -190,7 +191,7 @@ class WampProtocol(BaseProtocol): if not isinstance(action, int): raise Exception() - if not isinstance(curie_or_uri, (str, unicode)): + if not isinstance(curie_or_uri, string_types): raise Exception() uri = self.prefixes.resolve(curie_or_uri) diff --git a/src/lib/geventwebsocket/resource.py b/src/lib/geventwebsocket/resource.py index 36c1fb36..549f0d32 100644 --- a/src/lib/geventwebsocket/resource.py +++ b/src/lib/geventwebsocket/resource.py @@ -1,8 +1,15 @@ import re +import warnings from .protocols.base import BaseProtocol from .exceptions import WebSocketError +try: + from collections import OrderedDict +except ImportError: + class OrderedDict: + pass + class WebSocketApplication(object): protocol_class = BaseProtocol @@ -41,15 +48,33 @@ class Resource(object): def __init__(self, apps=None): self.apps = apps if apps else [] - def _app_by_path(self, environ_path): - # Which app matched the current path? + if isinstance(apps, dict): + if not isinstance(apps, OrderedDict): + warnings.warn("Using an unordered dictionary for the " + "app list is discouraged and may lead to " + "undefined behavior.", UserWarning) - for path, app in self.apps.iteritems(): + self.apps = apps.items() + + # An app can either be a standard WSGI application (an object we call with + # __call__(self, environ, start_response)) or a class we instantiate + # (and which can handle websockets). This function tells them apart. + # Override this if you have apps that can handle websockets but don't + # fulfill these criteria. + def _is_websocket_app(self, app): + return isinstance(app, type) and issubclass(app, WebSocketApplication) + + def _app_by_path(self, environ_path, is_websocket_request): + # Which app matched the current path? + for path, app in self.apps: if re.match(path, environ_path): - return app + if is_websocket_request == self._is_websocket_app(app): + return app + return None def app_protocol(self, path): - app = self._app_by_path(path) + # app_protocol will only be called for websocket apps + app = self._app_by_path(path, True) if hasattr(app, 'protocol_name'): return app.protocol_name() @@ -58,17 +83,18 @@ class Resource(object): def __call__(self, environ, start_response): environ = environ - current_app = self._app_by_path(environ['PATH_INFO']) + is_websocket_call = 'wsgi.websocket' in environ + current_app = self._app_by_path(environ['PATH_INFO'], is_websocket_call) if current_app is None: raise Exception("No apps defined") - if 'wsgi.websocket' in environ: + if is_websocket_call: ws = environ['wsgi.websocket'] current_app = current_app(ws) current_app.ws = ws # TODO: needed? current_app.handle() - - return None + # Always return something, calling WSGI middleware may rely on it + return [] else: return current_app(environ, start_response) diff --git a/src/lib/geventwebsocket/server.py b/src/lib/geventwebsocket/server.py index 00443b8a..e939bd11 100644 --- a/src/lib/geventwebsocket/server.py +++ b/src/lib/geventwebsocket/server.py @@ -5,6 +5,7 @@ from .logging import create_logger class WebSocketServer(WSGIServer): + handler_class = WebSocketHandler debug_log_format = ( '-' * 80 + '\n' + '%(levelname)s in %(module)s [%(pathname)s:%(lineno)d]:\n' + @@ -18,7 +19,6 @@ class WebSocketServer(WSGIServer): self._logger = None self.clients = {} - kwargs['handler_class'] = WebSocketHandler super(WebSocketServer, self).__init__(*args, **kwargs) def handle(self, socket, address): diff --git a/src/lib/geventwebsocket/utf8validator.py b/src/lib/geventwebsocket/utf8validator.py index b8a3e8a5..d604f966 100644 --- a/src/lib/geventwebsocket/utf8validator.py +++ b/src/lib/geventwebsocket/utf8validator.py @@ -1,128 +1,224 @@ +from ._compat import PY3 + ############################################################################### -## -## Copyright 2011-2013 Tavendo GmbH -## -## Note: -## -## This code is a Python implementation of the algorithm -## -## "Flexible and Economical UTF-8 Decoder" -## -## by Bjoern Hoehrmann -## -## bjoern@hoehrmann.de -## http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## +# +# The MIT License (MIT) +# +# Copyright (c) Crossbar.io Technologies GmbH +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ############################################################################### +# Note: This code is a Python implementation of the algorithm +# "Flexible and Economical UTF-8 Decoder" by Bjoern Hoehrmann +# bjoern@hoehrmann.de, http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ -## use Cython implementation of UTF8 validator if available -## +__all__ = ("Utf8Validator",) + + +# DFA transitions +UTF8VALIDATOR_DFA = ( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 00..1f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 20..3f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 40..5f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # 60..7f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, # 80..9f + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # a0..bf + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # c0..df + 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, # e0..ef + 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, # f0..ff + 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, # s0..s0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, # s1..s2 + 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, # s3..s4 + 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, # s5..s6 + 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # s7..s8 +) + +UTF8_ACCEPT = 0 +UTF8_REJECT = 1 + + +# use Cython implementation of UTF8 validator if available +# try: from wsaccel.utf8validator import Utf8Validator -except: - ## fallback to pure Python implementation - class Utf8Validator: - """ - Incremental UTF-8 validator with constant memory consumption (minimal - state). +except ImportError: + # + # Fallback to pure Python implementation - also for PyPy. + # + # Do NOT touch this code unless you know what you are doing! + # https://github.com/oberstet/scratchbox/tree/master/python/utf8 + # - Implements the algorithm "Flexible and Economical UTF-8 Decoder" by - Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/). - """ + if PY3: - ## DFA transitions - UTF8VALIDATOR_DFA = [ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, # 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, # 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, # a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, # c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, # e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, # f0..ff - 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, # s0..s0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, # s1..s2 - 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, # s3..s4 - 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, # s5..s6 - 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, # s7..s8 - ] + # Python 3 and above - UTF8_ACCEPT = 0 - UTF8_REJECT = 1 + # convert DFA table to bytes (performance) + UTF8VALIDATOR_DFA_S = bytes(UTF8VALIDATOR_DFA) - def __init__(self): - self.reset() - - def decode(self, b): + class Utf8Validator(object): """ - Eat one UTF-8 octet, and validate on the fly. + Incremental UTF-8 validator with constant memory consumption (minimal state). - Returns UTF8_ACCEPT when enough octets have been consumed, in which case - self.codepoint contains the decoded Unicode code point. - - Returns UTF8_REJECT when invalid UTF-8 was encountered. - - Returns some other positive integer when more octets need to be eaten. - """ - type = Utf8Validator.UTF8VALIDATOR_DFA[b] - - if self.state != Utf8Validator.UTF8_ACCEPT: - self.codepoint = (b & 0x3f) | (self.codepoint << 6) - else: - self.codepoint = (0xff >> type) & b - - self.state = Utf8Validator.UTF8VALIDATOR_DFA[256 + self.state * 16 + type] - - return self.state - - def reset(self): - """ - Reset validator to start new incremental UTF-8 decode/validation. - """ - self.state = Utf8Validator.UTF8_ACCEPT - self.codepoint = 0 - self.i = 0 - - def validate(self, ba): - """ - Incrementally validate a chunk of bytes provided as string. - - Will return a quad (valid?, endsOnCodePoint?, currentIndex, totalIndex). - - As soon as an octet is encountered which renders the octet sequence - invalid, a quad with valid? == False is returned. currentIndex returns - the index within the currently consumed chunk, and totalIndex the - index within the total consumed sequence that was the point of bail out. - When valid? == True, currentIndex will be len(ba) and totalIndex the - total amount of consumed bytes. + Implements the algorithm "Flexible and Economical UTF-8 Decoder" by + Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/). """ - l = len(ba) + def __init__(self): + self.reset() - for i in xrange(l): - ## optimized version of decode(), since we are not interested in actual code points + def decode(self, b): + """ + Eat one UTF-8 octet, and validate on the fly. - self.state = Utf8Validator.UTF8VALIDATOR_DFA[256 + (self.state << 4) + Utf8Validator.UTF8VALIDATOR_DFA[ord(ba[i])]] + Returns ``UTF8_ACCEPT`` when enough octets have been consumed, in which case + ``self.codepoint`` contains the decoded Unicode code point. - if self.state == Utf8Validator.UTF8_REJECT: - self.i += i - return False, False, i, self.i + Returns ``UTF8_REJECT`` when invalid UTF-8 was encountered. - self.i += l + Returns some other positive integer when more octets need to be eaten. + """ + tt = UTF8VALIDATOR_DFA_S[b] + if self.state != UTF8_ACCEPT: + self.codepoint = (b & 0x3f) | (self.codepoint << 6) + else: + self.codepoint = (0xff >> tt) & b + self.state = UTF8VALIDATOR_DFA_S[256 + self.state * 16 + tt] + return self.state - return True, self.state == Utf8Validator.UTF8_ACCEPT, l, self.i + def reset(self): + """ + Reset validator to start new incremental UTF-8 decode/validation. + """ + self.state = UTF8_ACCEPT # the empty string is valid UTF8 + self.codepoint = 0 + self.i = 0 + + def validate(self, ba): + """ + Incrementally validate a chunk of bytes provided as string. + + Will return a quad ``(valid?, endsOnCodePoint?, currentIndex, totalIndex)``. + + As soon as an octet is encountered which renders the octet sequence + invalid, a quad with ``valid? == False`` is returned. ``currentIndex`` returns + the index within the currently consumed chunk, and ``totalIndex`` the + index within the total consumed sequence that was the point of bail out. + When ``valid? == True``, currentIndex will be ``len(ba)`` and ``totalIndex`` the + total amount of consumed bytes. + """ + # + # The code here is written for optimal JITting in PyPy, not for best + # readability by your grandma or particular elegance. Do NOT touch! + # + l = len(ba) + i = 0 + state = self.state + while i < l: + # optimized version of decode(), since we are not interested in actual code points + state = UTF8VALIDATOR_DFA_S[256 + (state << 4) + UTF8VALIDATOR_DFA_S[ba[i]]] + if state == UTF8_REJECT: + self.state = state + self.i += i + return False, False, i, self.i + i += 1 + self.state = state + self.i += l + return True, state == UTF8_ACCEPT, l, self.i + + else: + + # convert DFA table to string (performance) + UTF8VALIDATOR_DFA_S = ''.join([chr(c) for c in UTF8VALIDATOR_DFA]) + + class Utf8Validator(object): + """ + Incremental UTF-8 validator with constant memory consumption (minimal state). + + Implements the algorithm "Flexible and Economical UTF-8 Decoder" by + Bjoern Hoehrmann (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/). + """ + + def __init__(self): + self.reset() + + def decode(self, b): + """ + Eat one UTF-8 octet, and validate on the fly. + + Returns ``UTF8_ACCEPT`` when enough octets have been consumed, in which case + ``self.codepoint`` contains the decoded Unicode code point. + + Returns ``UTF8_REJECT`` when invalid UTF-8 was encountered. + + Returns some other positive integer when more octets need to be eaten. + """ + tt = ord(UTF8VALIDATOR_DFA_S[b]) + if self.state != UTF8_ACCEPT: + self.codepoint = (b & 0x3f) | (self.codepoint << 6) + else: + self.codepoint = (0xff >> tt) & b + self.state = ord(UTF8VALIDATOR_DFA_S[256 + self.state * 16 + tt]) + return self.state + + def reset(self): + """ + Reset validator to start new incremental UTF-8 decode/validation. + """ + self.state = UTF8_ACCEPT # the empty string is valid UTF8 + self.codepoint = 0 + self.i = 0 + + def validate(self, ba): + """ + Incrementally validate a chunk of bytes provided as string. + + Will return a quad ``(valid?, endsOnCodePoint?, currentIndex, totalIndex)``. + + As soon as an octet is encountered which renders the octet sequence + invalid, a quad with ``valid? == False`` is returned. ``currentIndex`` returns + the index within the currently consumed chunk, and ``totalIndex`` the + index within the total consumed sequence that was the point of bail out. + When ``valid? == True``, currentIndex will be ``len(ba)`` and ``totalIndex`` the + total amount of consumed bytes. + """ + # + # The code here is written for optimal JITting in PyPy, not for best + # readability by your grandma or particular elegance. Do NOT touch! + # + l = len(ba) + i = 0 + state = self.state + while i < l: + # optimized version of decode(), since we are not interested in actual code points + try: + state = ord(UTF8VALIDATOR_DFA_S[256 + (state << 4) + ord(UTF8VALIDATOR_DFA_S[ba[i]])]) + except: + import ipdb; ipdb.set_trace() + if state == UTF8_REJECT: + self.state = state + self.i += i + return False, False, i, self.i + i += 1 + self.state = state + self.i += l + return True, state == UTF8_ACCEPT, l, self.i diff --git a/src/lib/geventwebsocket/websocket.py b/src/lib/geventwebsocket/websocket.py index 6d4f76d3..7aad7698 100644 --- a/src/lib/geventwebsocket/websocket.py +++ b/src/lib/geventwebsocket/websocket.py @@ -1,11 +1,10 @@ import struct +import socket -from socket import error - +from ._compat import string_types, range_type, text_type from .exceptions import ProtocolError from .exceptions import WebSocketError from .exceptions import FrameTooLargeException - from .utf8validator import Utf8Validator @@ -62,7 +61,7 @@ class WebSocket(object): """ if not bytestring: - return u'' + return '' try: return bytestring.decode('utf-8') @@ -76,13 +75,10 @@ class WebSocket(object): :returns: The utf-8 byte string equivalent of `text`. """ - if isinstance(text, str): - return text + if not isinstance(text, str): + text = text_type(text or '') - if not isinstance(text, unicode): - text = unicode(text or '') - - return text.encode('utf-8') + return text.encode("utf-8") def _is_valid_close_code(self, code): """ @@ -166,7 +162,7 @@ class WebSocket(object): raise ProtocolError('Invalid close frame: {0} {1}'.format( header, payload)) - code = struct.unpack('!H', str(payload[:2]))[0] + code = struct.unpack('!H', payload[:2])[0] payload = payload[2:] if payload: @@ -203,15 +199,15 @@ class WebSocket(object): raise ProtocolError if not header.length: - return header, '' + return header, b'' try: payload = self.raw_read(header.length) - except error: - payload = '' + except socket.error: + payload = b'' except Exception: # TODO log out this exception - payload = '' + payload = b'' if len(payload) != header.length: raise WebSocketError('Unexpected EOF reading frame payload') @@ -238,7 +234,7 @@ class WebSocket(object): if an exception is called. Use `receive` instead. """ opcode = None - message = "" + message = bytearray() while True: header, payload = self.read_frame() @@ -286,9 +282,9 @@ class WebSocket(object): if opcode == self.OPCODE_TEXT: self.validate_utf8(message) - return message + return self._decode_bytes(message) else: - return bytearray(message) + return message def receive(self): """ @@ -306,7 +302,10 @@ class WebSocket(object): self.close(1007) except ProtocolError: self.close(1002) - except error: + except socket.timeout: + self.close() + self.current_app.on_close(MSG_CLOSED) + except socket.error: self.close() self.current_app.on_close(MSG_CLOSED) @@ -320,24 +319,29 @@ class WebSocket(object): self.current_app.on_close(MSG_ALREADY_CLOSED) raise WebSocketError(MSG_ALREADY_CLOSED) - if opcode == self.OPCODE_TEXT: + if not message: + return + + if opcode in (self.OPCODE_TEXT, self.OPCODE_PING): message = self._encode_bytes(message) elif opcode == self.OPCODE_BINARY: - message = str(message) + message = bytes(message) - header = Header.encode_header(True, opcode, '', len(message), 0) + header = Header.encode_header(True, opcode, b'', len(message), 0) try: self.raw_write(header + message) - except error: + except socket.error: raise WebSocketError(MSG_SOCKET_DEAD) + except: + raise def send(self, message, binary=None): """ Send a frame over the websocket with message as its payload """ if binary is None: - binary = not isinstance(message, (str, unicode)) + binary = not isinstance(message, string_types) opcode = self.OPCODE_BINARY if binary else self.OPCODE_TEXT @@ -347,7 +351,7 @@ class WebSocket(object): self.current_app.on_close(MSG_SOCKET_DEAD) raise WebSocketError(MSG_SOCKET_DEAD) - def close(self, code=1000, message=''): + def close(self, code=1000, message=b''): """ Close the websocket and connection, sending the specified code and message. The underlying socket object is _not_ closed, that is the @@ -360,9 +364,7 @@ class WebSocket(object): try: message = self._encode_bytes(message) - self.send_frame( - struct.pack('!H%ds' % len(message), code, message), - opcode=self.OPCODE_CLOSE) + self.send_frame(message, opcode=self.OPCODE_CLOSE) except WebSocketError: # Failed to write the closing frame but it's ok because we're # closing the socket anyway. @@ -420,18 +422,37 @@ class Header(object): payload = bytearray(payload) mask = bytearray(self.mask) - for i in xrange(self.length): + for i in range_type(self.length): payload[i] ^= mask[i % 4] - return str(payload) + return payload # it's the same operation unmask_payload = mask_payload def __repr__(self): - return ("
").format(self.fin, self.opcode, self.length, - self.flags, id(self)) + opcodes = { + 0: 'continuation(0)', + 1: 'text(1)', + 2: 'binary(2)', + 8: 'close(8)', + 9: 'ping(9)', + 10: 'pong(10)' + } + flags = { + 0x40: 'RSV1 MASK', + 0x20: 'RSV2 MASK', + 0x10: 'RSV3 MASK' + } + + return ("
").format( + self.fin, + opcodes.get(self.opcode, 'reserved({})'.format(self.opcode)), + self.length, + flags.get(self.flags, 'reserved({})'.format(self.flags)), + self.mask, id(self) + ) @classmethod def decode_header(cls, stream): @@ -509,7 +530,8 @@ class Header(object): """ first_byte = opcode second_byte = 0 - extra = '' + extra = b"" + result = bytearray() if fin: first_byte |= cls.FIN_MASK @@ -538,6 +560,11 @@ class Header(object): if mask: second_byte |= cls.MASK_MASK - extra += mask + result.append(first_byte) + result.append(second_byte) + result.extend(extra) - return chr(first_byte) + chr(second_byte) + extra + if mask: + result.extend(mask) + + return result