python

[numpy] numpy 범용 함수 - 문자열

전감자(◔◡◔) 2022. 9. 29. 01:27
'''
  문자열 관련 범용 함수
  1. 문법
     np.char.함수명
'''
import numpy as np
print(dir(np.char))

'''
['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', 
'__name__', '__package__', '__spec__', '_binary_op_dispatcher', '_center_dispatcher', 
'_clean_args', '_code_dispatcher', '_count_dispatcher', '_endswith_dispatcher', 
'_expandtabs_dispatcher', '_get_num_chars', '_globalvar', '_join_dispatcher', 
'_just_dispatcher', '_mod_dispatcher', '_multiply_dispatcher', '_partition_dispatcher', 
'_replace_dispatcher', '_split_dispatcher', '_splitlines_dispatcher', 
'_startswith_dispatcher', '_strip_dispatcher', '_to_string_or_unicode_array', 
'_translate_dispatcher', '_unary_op_dispatcher', '_use_unicode', '_vec_string', 
'_zfill_dispatcher', 
'add', 'array', 'array_function_dispatch', 'asarray', 'asbytes', 
'bool_', 'capitalize', 'center', 'character', 'chararray', 'compare_chararrays', 
'count', 'decode', 'encode', 'endswith', 'equal', 'expandtabs', 'find', 'functools', 
'greater', 'greater_equal', 'index', 'int_', 'integer', 'isalnum', 'isalpha', 'isdecimal', 
'isdigit', 'islower', 'isnumeric', 'isspace', 'istitle', 'isupper', 'join', 'less', 'less_equal', 
'ljust', 'lower', 'lstrip', 'mod', 'multiply', 'narray', 'ndarray', 'not_equal', 'numpy', 
'object_', 'overrides', 'partition', 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 
'rsplit', 'rstrip', 'set_module', 'split', 'splitlines', 'startswith', 'str_len', 'string_', 
'strip', 'swapcase', 'title', 'translate', 'unicode_', 'upper', 'zfill']
'''
print(dir(np.str))
'''
['__add__', '__class__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', 
'__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__gt__', 
'__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', 
'__mod__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', 
'__rmod__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 
'capitalize', 'casefold', 'center', 'count', 'encode', 'endswith', 'expandtabs', 'find', 
'format', 'format_map', 'index', 'isalnum', 'isalpha', 'isascii', 'isdecimal', 'isdigit', 
'isidentifier', 'islower', 'isnumeric', 'isprintable', 'isspace', 'istitle', 'isupper', 'join', 
'ljust', 'lower', 'lstrip', 'maketrans', 'partition', 'replace', 'rfind', 'rindex', 'rjust', 
'rpartition', 'rsplit', 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase', 
'title', 'translate', 'upper', 'zfill']
'''
'''
  문자열 관련 범용 함수
  1. 문법
     np.char.함수명
'''
'''
['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', 
'__name__', '__package__', '__spec__', '_binary_op_dispatcher', '_center_dispatcher', 
'_clean_args', '_code_dispatcher', '_count_dispatcher', '_endswith_dispatcher', 
'_expandtabs_dispatcher', '_get_num_chars', '_globalvar', '_join_dispatcher', 
'_just_dispatcher', '_mod_dispatcher', '_multiply_dispatcher', '_partition_dispatcher', 
'_replace_dispatcher', '_split_dispatcher', '_splitlines_dispatcher', 
'_startswith_dispatcher', '_strip_dispatcher', '_to_string_or_unicode_array', 
'_translate_dispatcher', '_unary_op_dispatcher', '_use_unicode', '_vec_string', 
'_zfill_dispatcher', 
'add', 'array', 'array_function_dispatch', 'asarray', 'asbytes', 
'bool_', 'capitalize', 'center', 'character', 'chararray', 'compare_chararrays', 
'count', 'decode', 'encode', 'endswith', 'equal', 'expandtabs', 'find', 'functools', 
'greater', 'greater_equal', 'index', 'int_', 'integer', 'isalnum', 'isalpha', 'isdecimal', 
'isdigit', 'islower', 'isnumeric', 'isspace', 'istitle', 'isupper', 'join', 'less', 'less_equal', 
'ljust', 'lower', 'lstrip', 'mod', 'multiply', 'narray', 'ndarray', 'not_equal', 'numpy', 
'object_', 'overrides', 'partition', 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 
'rsplit', 'rstrip', 'set_module', 'split', 'splitlines', 'startswith', 'str_len', 'string_', 
'strip', 'swapcase', 'title', 'translate', 'unicode_', 'upper', 'zfill']
'''
import numpy as np

# 1. np.char.add() 문자열 연결 .
x = np.array(["AAA", "BBB"])
x2 = np.array(["AAA2", "BBB2"])
print("1. np.char.add()", np.char.add(x, x2)) # ['AAAAAA2' 'BBBBBB2']


# 2. np.char.multiply() , 지정된 갯수만큼 문자열 연결 .
x = np.array(["AAA", "BBB"])
print("2. np.char.multiply() ", np.char.multiply(x, 2)) # ['AAAAAA' 'BBBBBB']

# 3. np.char.capitalize() , 첫글자 대문자
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.capitalize()", np.char.capitalize(x)) # ['Hello world' 'Say good bye' 'Who are you']

# 4. np.char.upper() , 대문자
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.upper()", np.char.upper(x)) #  ['HELLO WORLD' 'SAY GOOD BYE' 'WHO ARE YOU']

# 4. np.char.lower() , 소문자
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.lower()", np.char.lower(x)) #  ['hello world' 'say good bye' 'who are you']

# 4. np.char.swapcase() , swapcase
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.swapcase()", np.char.swapcase(x)) #  ['hELLO WORLD' 'sAY gOOD BYE' 'WHO aRE yOU']

# 4. np.char.title() , 단어별 첫글자는 대문자
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.title() ", np.char.title(x)) #  ['Hello World' 'Say Good Bye' 'Who Are You']

# 4. np.char.ljust() ,np.char.rjust() ,np.char.center() , 패딩 역학
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.ljust() ", np.char.ljust(x,20, fillchar="_")) #   ['Hello world_________' 'Say Good bye________' 'who Are You_________']
print("3.np.char.rjust() ", np.char.rjust(x,20, fillchar="_")) #   ['_________Hello world' '________Say Good bye' '_________who Are You']
print("3.np.char.center() ", np.char.center(x,20, fillchar="_")) #   ['____Hello world_____' '____Say Good bye____' '____who Are You_____'

# 4. np.char.lstrip() ,np.char.rstrip() ,np.char.center() , 공백 제거
x = np.array(["     Hello world     ", "     Say Good bye    ", "    who Are You    "])
print("3.np.char.lstrip() ", np.char.lstrip(x)) # ['Hello world     ' 'Say Good bye    ' 'who Are You    ']
print("3.np.char.rstrip() ", np.char.rstrip(x)) # ['     Hello world' '     Say Good bye' '    who Are You']
print("3.np.char.strip() ", np.char.strip(x)) # ['Hello world' 'Say Good bye' 'who Are You']

# 4. np.char.lstrip() ,np.char.rstrip() ,np.char.center() , 특정문자 제거
x = np.array(["HHelloHH", "HHelloHH", "HHelloHH"])
print("3.np.char.lstrip() ", np.char.lstrip(x, "H")) #  ['elloHH' 'elloHH' 'elloHH']
print("3.np.char.rstrip() ", np.char.rstrip(x, "H")) # ['HHello' 'HHello' 'HHello']
print("3.np.char.strip() ", np.char.strip(x, "H"))  # ['ello' 'ello' 'ello']

# 4. np.char.split() , 구분자 이용해서 문자열 분리 (기본은 공백)
x = np.array(["Hello world", "Say Good bye", "who Are You"])
print("3.np.char.split() ", np.char.split(x)) #   [list(['Hello', 'world']) list(['Say', 'Good', 'bye']) list(['who', 'Are', 'You'])]

# 4. np.char.split() , 구분자 이용해서 문자열 분리 (기본은 공백)
x = np.array(["Hello/world", "Say/Good/bye", "who/Are/You"])
print("3.np.char.split() ", np.char.split(x, "/")) # [list(['Hello', 'world']) list(['Say', 'Good', 'bye']) list(['who', 'Are', 'You'])]

# 4. np.char.replace() , 문자열 변경
x = np.array(["Hello/world", "Say/Good/bye", "who/Are/You"])
print("3.np.char.replace() ", np.char.replace(x, "/", "***")) # ['Hello***world' 'Say***Good***bye' 'who***Are***You']

# 4. np.char.find() , 특정 문자의 위치값
x = np.array(["Hello/world", "Say/Good/bye", "who/Are/You"])
print("3.np.char.find() ", np.char.find(x, "/")) # [5 3 3]

# 4. np.char.count() , 특정 문자의 빈도수
x = np.array(["Hello/world", "Say/Good/bye", "who/Are/You"])
print("3.np.char.count() ", np.char.count(x, "/")) # [1 2 2]

# 4. np.char.count() , 특정 문자의 빈도수
x = np.array(["Hello/world", "Say/Good/bye", "who/Are/You"])
print(",".join(["1","2","3"]))
# print(",".join([1,2,3])) # 에러 발생
print("3.np.char.join() ", np.char.join(",", x)) #  ['H,e,l,l,o,/,w,o,r,l,d' 'S,a,y,/,G,o,o,d,/,b,y,e' 'w,h,o,/,A,r,e,/,Y,o,u']

x2 = np.array([1,2,3], dtype=np.str_)
print("3.np.char.join() ", np.char.join(",", x2)) #

# 논리값 반환
x = np.array(["Hello","20","56","5Helo", "HELLO", "hello"])
print(np.char.isdigit(x)) # [False  True  True False False False]
print(np.char.isalpha(x)) # [ True False False False  True  True]
print(np.char.isupper(x))
print(np.char.islower(x))


x = np.array(["Hello world", "Say Good bye", "who Are You"])
# print(np.str.replace(x,"e","EEE"))
print(np.str.replace("Hello","e","EEE")) #  Deprecated 됨. 따라서 사용권장안함