{"templateId":"markdown","sharedDataIds":{"sidebar":"sidebar-@l10n/ja/sidebars.yaml"},"props":{"metadata":{"markdoc":{"tagList":[]},"type":"markdown"},"seo":{"title":"語彙チャンクとトークン化","projectTitle":"Tisane Developer Documentation","description":"Tisane is an NLP platform, used for content moderation & more.","llmstxt":{"hide":false,"sections":[{"title":"Table of contents","includeFiles":["**/*"],"excludeFiles":[]}],"excludeFiles":[]}},"dynamicMarkdocComponents":[],"compilationErrors":[],"ast":{"$$mdtype":"Tag","name":"article","attributes":{},"children":[{"$$mdtype":"Tag","name":"Heading","attributes":{"level":1,"id":"語彙チャンクとトークン化","__idx":0},"children":["語彙チャンクとトークン化"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["Tisaneでは、語彙チャンクの表現に統一性を持たせ、論理的な形態素ベースの表現を採用しています。"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["ドイツ語のような複合語を使う言語では、複合語は構成要素に切り分けられます。"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["慣用的な",{"$$mdtype":"Tag","name":"MarkdownLink","attributes":{"href":"https://en.wikipedia.org/wiki/Multiword_expression"},"children":["複数単語表現"]},"（「kung fu」、「power plant」、「clay pigeon」）は1つの語彙とみなされます。"]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":3,"id":"例","__idx":1},"children":["例："]},{"$$mdtype":"Tag","name":"ul","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":["英語：\"I don't see the power plant.\"=> [\"I\", \"do\", \"n't\", \"see\", \"the\", \"power plant\", \".\"]"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["ドイツ語：\"Jetzt sollen die Stahlkugeln ersetzt werden.\"=> [\"Jetzt\", \"sollen\", \"die\", \"Stahl\", \"kugeln\", \"ersetzt\", \"werden\", \".\"]"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["中国語簡体字：\"我给了老张三本书\" => [\"我\", \"给了\", \"老张\", \"三\", \"本\", \"书\"]（空白を使わない言語では、助詞は修飾する単語と一緒になることが多くあります。）"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["スペイン語：\"Asimismo, San Francisco es una de las mejores ciudades de EE.UU.\"=> [\"Asimismo\", \",\", \"San Francisco\", \"es\", \"una\", \"de\", \"las\", \"mejores\", \"ciudades\", \"de\", \"EE.UU.\"]"]}]},{"$$mdtype":"Tag","name":"Heading","attributes":{"level":2,"id":"使い方","__idx":2},"children":["使い方"]},{"$$mdtype":"Tag","name":"p","attributes":{},"children":["トークン化／語彙チャンクにTisaneを利用するには："]},{"$$mdtype":"Tag","name":"ol","attributes":{},"children":[{"$$mdtype":"Tag","name":"li","attributes":{},"children":[{"$$mdtype":"Tag","name":"code","attributes":{},"children":["\"words\":true"]},"を",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["settings"]},"で指定する。"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["レスポンスでは、",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["sentence_list"]},"セクションのすべての要素（個々の文章）を横断する。"]},{"$$mdtype":"Tag","name":"li","attributes":{},"children":["語彙チャンクは ",{"$$mdtype":"Tag","name":"code","attributes":{},"children":["words"]},"の下にあります。"]}]}]},"headings":[{"value":"語彙チャンクとトークン化","id":"語彙チャンクとトークン化","depth":1},{"value":"例：","id":"例","depth":3},{"value":"使い方","id":"使い方","depth":2}],"frontmatter":{"seo":{"title":"語彙チャンクとトークン化"}},"lastModified":"2025-06-30T05:16:22.000Z","pagePropGetterError":{"message":"","name":""}},"slug":"/ja/guides/how-tos/lexicalchunking","userData":{"isAuthenticated":false,"teams":["anonymous"]},"isPublic":true}