{"id":981,"date":"2014-08-06T15:34:00","date_gmt":"2014-08-06T15:34:00","guid":{"rendered":"https:\/\/rawbytes.org\/?p=981"},"modified":"2025-11-24T02:59:24","modified_gmt":"2025-11-24T02:59:24","slug":"python%e3%81%a7nkf%e3%82%92%e4%bd%bf%e3%81%a3%e3%81%a6%e6%96%87%e5%ad%97%e3%82%b3%e3%83%bc%e3%83%89%e3%81%ae%e5%a4%89%e6%8f%9b%e3%82%92%e8%a1%8c%e3%81%86%e8%a6%81%e7%82%b9%e3%81%a0%e3%81%91","status":"publish","type":"post","link":"https:\/\/rawbytes.org\/?p=981","title":{"rendered":"python\u3067nkf\u3092\u4f7f\u3063\u3066\u6587\u5b57\u30b3\u30fc\u30c9\u306e\u5909\u63db\u3092\u884c\u3046\u8981\u70b9\u3060\u3051"},"content":{"rendered":"<p>\u74b0\u5883\u4f9d\u5b58\u6587\u5b57\u3092\u542b\u3080utf-8\u6587\u5b57\u5217\u3092euc_jp\u306b\u5909\u63db\u3057\u305f\u3044\u3068\u304d\u306b\u3001python\u306eunicode.encode(&#8216;euc_jp&#8217;)\u3060\u3068\u30a8\u30e9\u30fc\u306b\u306a\u3063\u3066\u3057\u307e\u3046\u306e\u3067\u3053\u308c\u3092nkf\u30e2\u30b8\u30e5\u30fc\u30eb\u3067\u89e3\u6c7a\u3059\u308b\u3068\u3044\u3046\u5185\u5bb9\u3002<\/p>\n<p><!--more--><br \/>\n\u30a8\u30e9\u30fc\u306f\u5177\u4f53\u7684\u306b\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u611f\u3058<\/p>\n<blockquote class=\"tr_bq\"><p>\n&gt;&gt;&gt; unicode_str = unicode(u&#8217;\u2460\u2461\u2462&#8217;)<br \/>\n&gt;&gt;&gt; print unicode_str<br \/>\n\u2460\u2461\u2462<br \/>\n&gt;&gt;&gt; unicode_str.encode(&#8216;euc-jp&#8217;)<br \/>\nTraceback (most recent call last):<br \/>\n&nbsp; File &#8220;&lt;stdin&gt;&#8221;, line 1, in &lt;module&gt;<br \/>\nUnicodeEncodeError: &#8216;euc_jp&#8217; codec can&#8217;t encode character u&#8217;u2460&#8242; in position 0: illegal multibyte sequence&nbsp;<\/p><\/blockquote>\n<p>\u74b0\u5883\u4f9d\u5b58\u6587\u5b57\u306e\u5909\u63db\u306b\u306fpython\u306enkf\u30e2\u30b8\u30e5\u30fc\u30eb\u3092\u4f7f\u3046\u3068\u826f\u3044\u3068\u805e\u3044\u305f\u306e\u3067\u8a66\u3057\u3066\u307f\u305f\u3002\u307e\u305a\u306f\u5fc5\u8981\u306ankf\u30b3\u30de\u30f3\u30c9\u3068python\u7528\u306e\u30e2\u30b8\u30e5\u30fc\u30eb\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3002\u3061\u306a\u307f\u306bOSX Mavericks\u3068Python 2.7.7\u306e\u74b0\u5883\u3067\u8a66\u3057\u305f\u3002<\/p>\n<blockquote class=\"tr_bq\"><p>\n$ brew install nkf<br \/>\n$ pip install nkf<\/p><\/blockquote>\n<p>\u3010\u8ffd\u8a18\u3011 \u6df1\u304f\u8003\u3048\u305a\u306bnkf\u30b3\u30de\u30f3\u30c9\u3092Homebrew\u304b\u3089\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u305f\u3051\u3069\u3053\u308c\u306f\u4e0d\u8981\u304b\u3082\u3057\u308c\u306a\u3044\u3002<\/p>\n<p>Python\u3092\u8d77\u52d5\u3057\u3066utf\u306e\u6587\u5b57\u5217\u3092\u8aad\u307f\u8fbc\u3080\u3002\u30d5\u30a1\u30a4\u30eb\u306b\u306f\u300c\u2460\u2461\u2462\u300d\u3068\u3044\u3046\u6587\u5b57\u5217\u304c\u5165\u3063\u3066\u3044\u308b\u3002\u4eca\u56de\u306fmi\u3067\u30d5\u30a1\u30a4\u30eb\u3092\u958b\u3044\u3066\u78ba\u8a8d\u3059\u308b\u3002<\/p>\n<div class=\"separator\" style=\"clear: both;text-align: center\">\n<a href=\"https:\/\/rawbytes.org\/wp-content\/uploads\/2014\/08\/utf8.png\" style=\"margin-left: 1em;margin-right: 1em\"><img decoding=\"async\" border=\"0\" src=\"https:\/\/rawbytes.org\/wp-content\/uploads\/2014\/08\/utf8.png\" \/><\/a><\/div>\n<p>\n\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u8aad\u307f\u8fbc\u3093\u3060\u6642\u70b9\u3067\u306fstr\u578b\u3002<\/p>\n<blockquote class=\"tr_bq\"><p>\n$ python<br \/>\nPython 2.7.7 (default, Jun 14 2014, 23:12:13)<br \/>\n[GCC 4.2.1 Compatible Apple LLVM 5.1 (clang-503.0.40)] on darwin<br \/>\nType &#8220;help&#8221;, &#8220;copyright&#8221;, &#8220;credits&#8221; or &#8220;license&#8221; for more information.<br \/>\n&gt;&gt;&gt; import nkf<br \/>\n&gt;&gt;&gt; f = open(&#8216;utf_orig.txt&#8217;,&#8217;r&#8217;)<br \/>\n&gt;&gt;&gt; str_utf = f.read()<br \/>\n&gt;&gt;&gt; f.close()<br \/>\n&gt;&gt;&gt; print str_utf<br \/>\n\u2460\u2461\u2462<br \/>\n&gt;&gt;&gt; type(str_utf)<br \/>\n&lt;type &#8216;str&#8217;&gt;<\/p><\/blockquote>\n<p>\u6bd4\u8f03\u7528\u306bunicode\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3082\u4f5c\u6210\u3057\u3066\u304a\u304f\u3002\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u611f\u3058<\/p>\n<blockquote class=\"tr_bq\"><p>\n&gt;&gt;&gt; str_uni = unicode(str_utf, &#8216;utf-8&#8217;)<br \/>\n&gt;&gt;&gt; type(str_uni)<br \/>\n&lt;type &#8216;unicode&#8217;&gt;&nbsp;<\/p><\/blockquote>\n<p>nkf\u306b\u306fstring\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u6e21\u3059\u3088\u3046\u306a\u306e\u3067\u3001\u4e0a\u8a18\u306e\u4f8b\u3067\u306fstr_utf\uff08\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u8aad\u307f\u8fbc\u3093\u3060\u305d\u306e\u307e\u307e\uff09\u3092\u4f7f\u3046\u3002\u7b2c\u4e00\u5f15\u6570\u306b\u306fnkf\u30b3\u30de\u30f3\u30c9\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u3001\u7b2c\u4e8c\u5f15\u6570\u306b\u5909\u63db\u3057\u305f\u3044str\u578b\u306e\u30c7\u30fc\u30bf\u3092\u6e21\u3059\u3002\u4eca\u56de\u306feuc_jp\u306b\u5909\u63db\u3057\u305f\u3044\u306e\u3067\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3059\u308b\u3002<\/p>\n<blockquote class=\"tr_bq\"><p>\n&gt;&gt;&gt; str_euc = nkf.nkf(&#8216;e&#8217;, str_utf)<\/p><\/blockquote>\n<p>\u30d5\u30a1\u30a4\u30eb\u306b\u66f8\u304d\u51fa\u3057\u3066\u307f\u308b<\/p>\n<blockquote class=\"tr_bq\"><p>\n&gt;&gt;&gt; fout = open(&#8216;euc_converted.txt&#8217;, &#8216;w&#8217;)<br \/>\n&gt;&gt;&gt; fout.write(str_euc)<br \/>\n&gt;&gt;&gt; fout.close()&nbsp;<\/p><\/blockquote>\n<p>\u5909\u66f4\u5f8c\u306e\u66f8\u304d\u51fa\u3057\u305f\u30d5\u30a1\u30a4\u30eb\u3092\u518d\u3073mi\u3067\u958b\u3044\u3066\u307f\u308b\u3068\u3001\u4eca\u5ea6\u306feuc_jp\u3067\u74b0\u5883\u4f9d\u5b58\u6587\u5b57\u304c\u51fa\u529b\u3055\u308c\u3066\u3044\u308b\u3053\u3068\u304c\u5206\u304b\u308b\u3002<\/p>\n<div class=\"separator\" style=\"clear: both;text-align: center\">\n<a href=\"https:\/\/rawbytes.org\/wp-content\/uploads\/2014\/08\/euc.png\" style=\"margin-left: 1em;margin-right: 1em\"><img decoding=\"async\" border=\"0\" src=\"https:\/\/rawbytes.org\/wp-content\/uploads\/2014\/08\/euc.png\" \/><\/a><\/div>\n<div class=\"separator\" style=\"clear: both;text-align: center\">\n<\/div>\n<div class=\"separator\" style=\"clear: both;text-align: left\">\n\u3067\u306f\u3001unicode\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u304c\u3042\u308b\u5834\u5408\u306b\u306f\u3069\u3046\u3059\u308b\u304b\uff1funicode\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u3044\u3061\u3069\u4efb\u610f\u306e\u6587\u5b57\u30b3\u30fc\u30c9\u3067encode\u3057\u3066str\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u3044\u3063\u305f\u3093\u623b\u3057\u3066\u3001\u305d\u306e\u4e0a\u3067nkf\u30e1\u30bd\u30c3\u30c9\u3092\u901a\u305b\u3070\u826f\u3044\u3002<\/div>\n<blockquote class=\"tr_bq\"><p>\n&gt;&gt;&gt; str = str_uni.encode(&#8216;utf8&#8217;)<br \/>\n&gt;&gt;&gt; str_euc = nkf.nkf(&#8216;e&#8217;, str)<\/p><\/blockquote>\n<div class=\"separator\" style=\"clear: both;text-align: left\">\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u74b0\u5883\u4f9d\u5b58\u6587\u5b57\u3092\u542b\u3080utf-8\u6587\u5b57\u5217\u3092euc_jp\u306b\u5909\u63db\u3057\u305f\u3044\u3068\u304d\u306b\u3001python\u306eunicode.encode(&#8216;euc_jp&#8217;)\u3060\u3068\u30a8\u30e9\u30fc\u306b\u306a\u3063\u3066\u3057\u307e\u3046\u306e\u3067\u3053\u308c\u3092nkf\u30e2\u30b8\u30e5\u30fc\u30eb\u3067\u89e3\u6c7a\u3059\u308b\u3068\u3044\u3046\u5185\u5bb9\u3002<\/p>\n","protected":false},"author":1,"featured_media":982,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[11,21,32],"tags":[],"class_list":["post-981","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-develop","category-howto","category-python"],"_links":{"self":[{"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/posts\/981","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/rawbytes.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=981"}],"version-history":[{"count":1,"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/posts\/981\/revisions"}],"predecessor-version":[{"id":984,"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/posts\/981\/revisions\/984"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/rawbytes.org\/index.php?rest_route=\/wp\/v2\/media\/982"}],"wp:attachment":[{"href":"https:\/\/rawbytes.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=981"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/rawbytes.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=981"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/rawbytes.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=981"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}