stata 正则 保留中文
use 0807a\xcv,clear gen text_strings = B . *仅保留中文字符 . replace text_strings=ustrregexra(text_strings,"[^\u4e00-\u9fa5]+"," ") . *去除中文数字 . replace text_strings=ustrregexra(text_strings,"[一二三四五六七八九十]+"," ")
use 0807a\xcv,clear gen text_strings = B . *仅保留中文字符 . replace text_strings=ustrregexra(text_strings,"[^\u4e00-\u9fa5]+"," ") . *去除中文数字 . replace text_strings=ustrregexra(text_strings,"[一二三四五六七八九十]+"," ")