Apr 14, 2021 05:25
def isvalid(myword):
for y in myword:
for i in y:
x = i.encode("raw_unicode_escape")
if x[:5][-3:] not in [b"090", b"091", b"092", b"093", b"094"]:
return
if x[:6][-4:].upper() in [ b"0900", b"0904", b"090E", b"0912", b"0929", b"0934", b"093A", b"093B", b"0946", b"094A", b"094E", b"094F"]:
return
return myword
with open("my_corpus2.txt") as f:
content = f.readlines()
guj = [x.strip() for x in content]
import sys
sys.stdout = open("valid_word8.txt", "w")
for m in guj:
if isinvalid(m):
print(isinvalid(m))
unicode,
devnagari