Merge pull request #5 from zlove/master
Fixing counting of GSM 7-bit extended characters
This commit is contained in:
commit
b264ab16c9
@ -25,6 +25,12 @@ from sms_counter import SMSCounter
|
||||
>>> {'length': 29, 'messages': 1, 'remaining': 41, 'per_message': 70, 'encoding': 'UTF16'}
|
||||
```
|
||||
|
||||
The meaning of the `length`, `remaining` and `per_message` values returned by `SMSCounter.count()` depend on the encoding.
|
||||
|
||||
For GSM_7BIT_EX encoding, `length`, `remaining` and `per_message` count the number of 7-bit characters in the message, __including__ the escape character that must precede any characters in the "extended" character set. For example, the `length` of the message '€' is 2, because it takes 2 7bit characters to encode '€' in GSM_7BIT_EX.
|
||||
|
||||
For UTF16 and GSM_7BIT encoding, `length`, `remaining` and `per_message` count the number of characters (since all characters have an equal bit width).
|
||||
|
||||
#### Mentions
|
||||
|
||||
* Original idea : [danxexe/sms-counter](https://github.com/danxexe/sms-counter)
|
||||
|
@ -18,20 +18,20 @@ class SMSCounter(object):
|
||||
@classmethod
|
||||
def _get_gsm_7bit_map(cls):
|
||||
gsm_7bit_map = [
|
||||
10, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
||||
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99, 100,
|
||||
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
|
||||
114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164, 165,
|
||||
191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224, 228,
|
||||
229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915, 916,
|
||||
920, 923, 926, 928, 931, 934, 936, 937]
|
||||
10, 12, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
|
||||
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99,
|
||||
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
|
||||
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164,
|
||||
165, 191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224,
|
||||
228, 229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915,
|
||||
916, 920, 923, 926, 928, 931, 934, 936, 937]
|
||||
return gsm_7bit_map
|
||||
|
||||
@classmethod
|
||||
def _get_added_gsm_7bit_ex_map(cls):
|
||||
added_gsm_7bit_ex_map = [91, 92, 93, 94, 123, 124, 125, 126, 8364]
|
||||
added_gsm_7bit_ex_map = [12, 91, 92, 93, 94, 123, 124, 125, 126, 8364]
|
||||
return added_gsm_7bit_ex_map
|
||||
|
||||
@classmethod
|
||||
@ -59,11 +59,11 @@ class SMSCounter(object):
|
||||
def count(cls, plaintext):
|
||||
textlist = cls._text_to_unicode_pointcode_list(plaintext)
|
||||
|
||||
exchars = []
|
||||
encoding = cls._detect_encoding(plaintext)
|
||||
length = len(textlist)
|
||||
|
||||
if encoding == cls.GSM_7BIT_EX:
|
||||
exchars = [c for c in textlist if c in cls._get_added_gsm_7bit_ex_map()]
|
||||
lengthexchars = len(exchars)
|
||||
length += lengthexchars
|
||||
|
||||
|
28
tests.py
28
tests.py
@ -15,43 +15,53 @@ class SMSCounterTestCase(TestCase):
|
||||
def _check_gsm_7bit_sms_length(self, counter, message, message_max_length=GSM_7BIT_LEN):
|
||||
length_error = 'Message length should be less than {} symbols'.format(message_max_length)
|
||||
self.assertLessEqual(len(message), counter.get('per_message'), length_error)
|
||||
self.assertEqual(len(message), counter.get('length'))
|
||||
|
||||
textlist = SMSCounter._text_to_unicode_pointcode_list(message)
|
||||
exchars = [c for c in textlist if c in SMSCounter._get_added_gsm_7bit_ex_map()]
|
||||
|
||||
self.assertEqual(len(message) + len(exchars), counter.get('length'))
|
||||
self.assertEqual(counter.get('messages'), 1, 'Message is not multipart')
|
||||
|
||||
def test_gsm_7bit_chars(self):
|
||||
message = self.GSM_7BIT_CHAR_MAP[:self.GSM_7BIT_LEN]
|
||||
message = 'This is a message with only GSM 7-bit characters'
|
||||
counter = SMSCounter.count(message)
|
||||
self._check_gsm_7bit_sms_length(counter, message)
|
||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT)
|
||||
|
||||
def test_gsm_7bit_ex_chars(self):
|
||||
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
|
||||
message = message[:self.GSM_7BIT_LEN]
|
||||
message = 'This message has the GSM 7-bit extension characters appended ' + ''.join(self.GSM_7BIT_EX_CHAR_MAP)
|
||||
counter = SMSCounter.count(message)
|
||||
self._check_gsm_7bit_sms_length(counter, message)
|
||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
||||
|
||||
def test_gsm_7bit_ex_chars_count_double(self):
|
||||
message = self.GSM_7BIT_EX_CHAR_MAP[:5]
|
||||
counter = SMSCounter.count(message)
|
||||
self.assertEqual(counter.get('per_message'), self.GSM_7BIT_LEN)
|
||||
self.assertEqual(counter.get('length'), 10)
|
||||
self.assertEqual(counter.get('messages'), 1)
|
||||
|
||||
def test_utf_chars(self):
|
||||
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
||||
utf_message = utf_message[:self.UTF16_LEN]
|
||||
utf_message = utf_message[:self.UTF16_LEN_MULTIPART]
|
||||
counter = SMSCounter.count(utf_message)
|
||||
self._check_gsm_7bit_sms_length(counter, utf_message, self.UTF16_LEN)
|
||||
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
||||
self.assertEqual(counter.get('length'), len(utf_message))
|
||||
self.assertEqual(counter.get('messages'), 1)
|
||||
|
||||
def test_multipart_sms_gsm_7bit(self):
|
||||
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
|
||||
double_message = message[:self.GSM_7BIT_LEN_MULTIPART] * 2
|
||||
counter = SMSCounter.count(double_message)
|
||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
||||
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
|
||||
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
|
||||
|
||||
def test_multipart_sms_utf(self):
|
||||
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
||||
double_utf_message = utf_message[:self.UTF16_LEN_MULTIPART] * 2
|
||||
counter = SMSCounter.count(double_utf_message)
|
||||
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
||||
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
|
||||
|
||||
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
|
||||
|
||||
if __name__ == '__main__':
|
||||
import unittest
|
||||
|
Loading…
Reference in New Issue
Block a user