Merge pull request #5 from zlove/master

Fixing counting of GSM 7-bit extended characters
This commit is contained in:
Dayo Ayeni 2021-05-18 23:37:48 -06:00 committed by GitHub
commit b264ab16c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 20 deletions

View File

@ -25,6 +25,12 @@ from sms_counter import SMSCounter
>>> {'length': 29, 'messages': 1, 'remaining': 41, 'per_message': 70, 'encoding': 'UTF16'}
```
The meaning of the `length`, `remaining` and `per_message` values returned by `SMSCounter.count()` depend on the encoding.
For GSM_7BIT_EX encoding, `length`, `remaining` and `per_message` count the number of 7-bit characters in the message, __including__ the escape character that must precede any characters in the "extended" character set. For example, the `length` of the message '€' is 2, because it takes 2 7bit characters to encode '€' in GSM_7BIT_EX.
For UTF16 and GSM_7BIT encoding, `length`, `remaining` and `per_message` count the number of characters (since all characters have an equal bit width).
#### Mentions
* Original idea : [danxexe/sms-counter](https://github.com/danxexe/sms-counter)

View File

@ -18,20 +18,20 @@ class SMSCounter(object):
@classmethod
def _get_gsm_7bit_map(cls):
gsm_7bit_map = [
10, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164, 165,
191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224, 228,
229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915, 916,
920, 923, 926, 928, 931, 934, 936, 937]
10, 12, 13, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 97, 98, 99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 161, 163, 164,
165, 191, 196, 197, 198, 199, 201, 209, 214, 216, 220, 223, 224,
228, 229, 230, 232, 233, 236, 241, 242, 246, 248, 249, 252, 915,
916, 920, 923, 926, 928, 931, 934, 936, 937]
return gsm_7bit_map
@classmethod
def _get_added_gsm_7bit_ex_map(cls):
added_gsm_7bit_ex_map = [91, 92, 93, 94, 123, 124, 125, 126, 8364]
added_gsm_7bit_ex_map = [12, 91, 92, 93, 94, 123, 124, 125, 126, 8364]
return added_gsm_7bit_ex_map
@classmethod
@ -59,11 +59,11 @@ class SMSCounter(object):
def count(cls, plaintext):
textlist = cls._text_to_unicode_pointcode_list(plaintext)
exchars = []
encoding = cls._detect_encoding(plaintext)
length = len(textlist)
if encoding == cls.GSM_7BIT_EX:
exchars = [c for c in textlist if c in cls._get_added_gsm_7bit_ex_map()]
lengthexchars = len(exchars)
length += lengthexchars

View File

@ -15,43 +15,53 @@ class SMSCounterTestCase(TestCase):
def _check_gsm_7bit_sms_length(self, counter, message, message_max_length=GSM_7BIT_LEN):
length_error = 'Message length should be less than {} symbols'.format(message_max_length)
self.assertLessEqual(len(message), counter.get('per_message'), length_error)
self.assertEqual(len(message), counter.get('length'))
textlist = SMSCounter._text_to_unicode_pointcode_list(message)
exchars = [c for c in textlist if c in SMSCounter._get_added_gsm_7bit_ex_map()]
self.assertEqual(len(message) + len(exchars), counter.get('length'))
self.assertEqual(counter.get('messages'), 1, 'Message is not multipart')
def test_gsm_7bit_chars(self):
message = self.GSM_7BIT_CHAR_MAP[:self.GSM_7BIT_LEN]
message = 'This is a message with only GSM 7-bit characters'
counter = SMSCounter.count(message)
self._check_gsm_7bit_sms_length(counter, message)
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT)
def test_gsm_7bit_ex_chars(self):
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
message = message[:self.GSM_7BIT_LEN]
message = 'This message has the GSM 7-bit extension characters appended ' + ''.join(self.GSM_7BIT_EX_CHAR_MAP)
counter = SMSCounter.count(message)
self._check_gsm_7bit_sms_length(counter, message)
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
def test_gsm_7bit_ex_chars_count_double(self):
message = self.GSM_7BIT_EX_CHAR_MAP[:5]
counter = SMSCounter.count(message)
self.assertEqual(counter.get('per_message'), self.GSM_7BIT_LEN)
self.assertEqual(counter.get('length'), 10)
self.assertEqual(counter.get('messages'), 1)
def test_utf_chars(self):
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
utf_message = utf_message[:self.UTF16_LEN]
utf_message = utf_message[:self.UTF16_LEN_MULTIPART]
counter = SMSCounter.count(utf_message)
self._check_gsm_7bit_sms_length(counter, utf_message, self.UTF16_LEN)
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
self.assertEqual(counter.get('length'), len(utf_message))
self.assertEqual(counter.get('messages'), 1)
def test_multipart_sms_gsm_7bit(self):
message = self.GSM_7BIT_CHAR_MAP + self.GSM_7BIT_EX_CHAR_MAP
double_message = message[:self.GSM_7BIT_LEN_MULTIPART] * 2
counter = SMSCounter.count(double_message)
self.assertEqual(counter.get('encoding'), SMSCounter.GSM_7BIT_EX)
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
def test_multipart_sms_utf(self):
utf_message = ['£', 'ф', '±'] + self.GSM_7BIT_CHAR_MAP
double_utf_message = utf_message[:self.UTF16_LEN_MULTIPART] * 2
counter = SMSCounter.count(double_utf_message)
self.assertEqual(counter.get('encoding'), SMSCounter.UTF16)
self.assertEqual(counter.get('messages'), 2, 'Message must contains from 2 parts')
self.assertEqual(counter.get('messages'), 2, 'Message must contain 2 parts')
if __name__ == '__main__':
import unittest