Merge pull request #5 from zlove/master
Fixing counting of GSM 7-bit extended characters
This commit is contained in:
commit
b264ab16c9
@ -25,6 +25,12 @@ from sms_counter import SMSCounter
|
|||||||
>>> {'length': 29, 'messages': 1, 'remaining': 41, 'per_message': 70, 'encoding': 'UTF16'}
|
>>> {'length': 29, 'messages': 1, 'remaining': 41, 'per_message': 70, 'encoding': 'UTF16'}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The meaning of the `length`, `remaining` and `per_message` values returned by `SMSCounter.count()` depend on the encoding.
|
||||||
|
|
||||||
|
For GSM_7BIT_EX encoding, `length`, `remaining` and `per_message` count the number of 7-bit characters in the message, __including__ the escape character that must precede any characters in the "extended" character set. For example, the `length` of the message '€' is 2, because it takes 2 7bit characters to encode '€' in GSM_7BIT_EX.
|
||||||
|
|
||||||
|
For UTF16 and GSM_7BIT encoding, `length`, `remaining` and `per_message` count the number of characters (since all characters have an equal bit width).
|
||||||
|
|
||||||
#### Mentions
|
#### Mentions
|
||||||
|
|
||||||
* Original idea : [danxexe/sms-counter](https://github.com/danxexe/sms-counter)
|
* Original idea : [danxexe/sms-counter](https://github.com/danxexe/sms-counter)
|
||||||
|
@ -18,20 +18,20 @@ class SMSCounter(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def _get_gsm_7bit_map(cls):
|
def _get_gsm_7bit_map(cls):
|
||||||
gsm_7bit_map = [
|
gsm_7bit_map = [
|
||||||
10, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
10, 12, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||||
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
|
||||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99, 100,
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99,
|
||||||
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
|
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
|
||||||
114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164, 165,
|
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164,
|
||||||
191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224, 228,
|
165, 191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224,
|
||||||
229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915, 916,
|
228, 229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915,
|
||||||
920, 923, 926, 928, 931, 934, 936, 937]
|
916, 920, 923, 926, 928, 931, 934, 936, 937]
|
||||||
return gsm_7bit_map
|
return gsm_7bit_map
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_added_gsm_7bit_ex_map(cls):
|
def _get_added_gsm_7bit_ex_map(cls):
|
||||||
added_gsm_7bit_ex_map = [91, 92, 93, 94, 123, 124, 125, 126, 8364]
|
added_gsm_7bit_ex_map = [12, 91, 92, 93, 94, 123, 124, 125, 126, 8364]
|
||||||
return added_gsm_7bit_ex_map
|
return added_gsm_7bit_ex_map
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -59,11 +59,11 @@ class SMSCounter(object):
|
|||||||
def count(cls, plaintext):
|
def count(cls, plaintext):
|
||||||
textlist = cls._text_to_unicode_pointcode_list(plaintext)
|
textlist = cls._text_to_unicode_pointcode_list(plaintext)
|
||||||
|
|
||||||
exchars = []
|
|
||||||
encoding = cls._detect_encoding(plaintext)
|
encoding = cls._detect_encoding(plaintext)
|
||||||
length = len(textlist)
|
length = len(textlist)
|
||||||
|
|
||||||
if encoding == cls.GSM_7BIT_EX:
|
if encoding == cls.GSM_7BIT_EX:
|
||||||
|
exchars = [c for c in textlist if c in cls._get_added_gsm_7bit_ex_map()]
|
||||||
lengthexchars = len(exchars)
|
lengthexchars = len(exchars)
|
||||||
length += lengthexchars
|
length += lengthexchars
|
||||||
|
|
||||||
|
28
tests.py
28
tests.py
@ -15,43 +15,53 @@ class SMSCounterTestCase(TestCase):
|
|||||||
def _check_gsm_7bit_sms_length(self, counter, message, message_max_length=GSM_7BIT_LEN):
|
def _check_gsm_7bit_sms_length(self, counter, message, message_max_length=GSM_7BIT_LEN):
|
||||||
length_error = 'Message length should be less than {} symbols'.format(message_max_length)
|
length_error = 'Message length should be less than {} symbols'.format(message_max_length)
|
||||||
self.assertLessEqual(len(message), counter.get('per_message'), length_error)
|
self.assertLessEqual(len(message), counter.get('per_message'), length_error)
|
||||||
self.assertEqual(len(message), counter.get('length'))
|
|
||||||
|
textlist = SMSCounter._text_to_unicode_pointcode_list(message)
|
||||||
|
exchars = [c for c in textlist if c in SMSCounter._get_added_gsm_7bit_ex_map()]
|
||||||
|
|
||||||
|
self.assertEqual(len(message) + len(exchars), counter.get('length'))
|
||||||
self.assertEqual(counter.get('messages'), 1, 'Message is not multipart')
|
self.assertEqual(counter.get('messages'), 1, 'Message is not multipart')
|
||||||
|
|
||||||
def test_gsm_7bit_chars(self):
|
def test_gsm_7bit_chars(self):
|
||||||
message = self.GSM_7BIT_CHAR_MAP[:self.GSM_7BIT_LEN]
|
message = 'This is a message with only GSM 7-bit characters'
|
||||||
counter = SMSCounter.count(message)
|
counter = SMSCounter.count(message)
|
||||||
self._check_gsm_7bit_sms_length(counter, message)
|
self._check_gsm_7bit_sms_length(counter, message)
|
||||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT)
|
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT)
|
||||||
|
|
||||||
def test_gsm_7bit_ex_chars(self):
|
def test_gsm_7bit_ex_chars(self):
|
||||||
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
|
message = 'This message has the GSM 7-bit extension characters appended ' + ''.join(self.GSM_7BIT_EX_CHAR_MAP)
|
||||||
message = message[:self.GSM_7BIT_LEN]
|
|
||||||
counter = SMSCounter.count(message)
|
counter = SMSCounter.count(message)
|
||||||
self._check_gsm_7bit_sms_length(counter, message)
|
self._check_gsm_7bit_sms_length(counter, message)
|
||||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
||||||
|
|
||||||
|
def test_gsm_7bit_ex_chars_count_double(self):
|
||||||
|
message = self.GSM_7BIT_EX_CHAR_MAP[:5]
|
||||||
|
counter = SMSCounter.count(message)
|
||||||
|
self.assertEqual(counter.get('per_message'), self.GSM_7BIT_LEN)
|
||||||
|
self.assertEqual(counter.get('length'), 10)
|
||||||
|
self.assertEqual(counter.get('messages'), 1)
|
||||||
|
|
||||||
def test_utf_chars(self):
|
def test_utf_chars(self):
|
||||||
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
||||||
utf_message = utf_message[:self.UTF16_LEN]
|
utf_message = utf_message[:self.UTF16_LEN_MULTIPART]
|
||||||
counter = SMSCounter.count(utf_message)
|
counter = SMSCounter.count(utf_message)
|
||||||
self._check_gsm_7bit_sms_length(counter, utf_message, self.UTF16_LEN)
|
|
||||||
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
||||||
|
self.assertEqual(counter.get('length'), len(utf_message))
|
||||||
|
self.assertEqual(counter.get('messages'), 1)
|
||||||
|
|
||||||
def test_multipart_sms_gsm_7bit(self):
|
def test_multipart_sms_gsm_7bit(self):
|
||||||
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
|
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
|
||||||
double_message = message[:self.GSM_7BIT_LEN_MULTIPART] * 2
|
double_message = message[:self.GSM_7BIT_LEN_MULTIPART] * 2
|
||||||
counter = SMSCounter.count(double_message)
|
counter = SMSCounter.count(double_message)
|
||||||
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
|
||||||
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
|
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
|
||||||
|
|
||||||
def test_multipart_sms_utf(self):
|
def test_multipart_sms_utf(self):
|
||||||
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
|
||||||
double_utf_message = utf_message[:self.UTF16_LEN_MULTIPART] * 2
|
double_utf_message = utf_message[:self.UTF16_LEN_MULTIPART] * 2
|
||||||
counter = SMSCounter.count(double_utf_message)
|
counter = SMSCounter.count(double_utf_message)
|
||||||
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
|
||||||
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
|
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import unittest
|
import unittest
|
||||||
|
Loading…
Reference in New Issue
Block a user