Extract only valid words

Apr 14, 2021 05:25


def isvalid(myword):
  for y in myword:
      for i in y:
          x = i.encode("raw_unicode_escape")
          if x[:5][-3:] not in [b"090", b"091", b"092", b"093", b"094"]:
              return
          if x[:6][-4:].upper() in [ b"0900", b"0904", b"090E", b"0912", b"0929", b"0934", b"093A", b"093B", b"0946", b"094A", b"094E", b"094F"]:
              return 
  return myword
with open("my_corpus2.txt") as f:
  content = f.readlines()
guj = [x.strip() for x in content]
import sys
sys.stdout = open("valid_word8.txt", "w")
for m in guj:
  if isinvalid(m):
      print(isinvalid(m))

unicode, devnagari

Previous post Next post
Up