# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4 # $Id$ PortSystem 1.0 name tesseract version 3.01 revision 3 categories textproc graphics pdf platforms darwin maintainers emer.net:emer description Open source OCR engine long_description The Tesseract OCR engine was one of the top 3 engines in \ the 1995 UNLV Accuracy test. Between 1995 and 2006 it had \ little work done on it, but it is probably one of the \ most accurate open source OCR engines available. The \ source code will read a binary, grey or color image and \ output text. A tiff reader is built in that will read \ uncompressed TIFF images, or libtiff can be added to read \ compressed images. homepage http://code.google.com/p/tesseract-ocr/ master_sites googlecode:tesseract-ocr checksums rmd160 f254182870038068edb949714c3e989cd955e225 \ sha256 c24b0bd278291bc93ab242f93841c1d8743689c943bd804afbc5b898dc0a1c9b if {${name} == ${subport}} { depends_build port:autoconf \ port:automake \ port:libtool depends_lib port:tiff \ port:zlib \ port:libpng \ port:leptonica \ port:jpeg post-extract { file mkdir ${worksrcpath}/m4 } patchfiles patch-configure.ac.diff \ patch-ccutil-strngs.h.diff use_autoreconf yes autoreconf.cmd ./autogen.sh configure.env-append LIBLEPT_HEADERSDIR=${prefix}/include/leptonica livecheck.regex "tesseract-ocr-(\[\\d\.\]*\\d)${extract.suffix}" } else { depends_run port:tesseract livecheck.type none } set langs { ara 3.01 Arabic { rmd160 26ebacc37cff2a6386b7748f0fb62bd89fc55ebe \ sha256 bc0781e71ae688eec172500e70b603cb5277785475c466a3704e5b9e5a805934 } bul 3.00 Bulgarian { rmd160 cbcdb7532af8c03abb5f8bcdb5b34516b6a522e4 \ sha256 fc3c650568d014f2337052658e9ddd8efaff6ac74c9a90952d740eafdf400e2d } cat 3.00 Catalan { rmd160 3b1a5067e97e4c104ee766fc89cc47c261b55b38 \ sha256 dcc52db7522c67d2629117b7fde4c114ca51677f7f1efbedc12bf1e7319acde2 } ces 3.00 Czech { rmd160 356bc1982be43cb89e1fa62b1af53a0f28d14444 \ sha256 9848a4c137be45659e29f4aa07ccb6357fd4401307478cce0ed5f8e6686480d2 } chi_sim 3.00 {Chinese (Simplified)} { rmd160 7a3e5f0f33f9869c10f793daee4883a5167eb178 \ sha256 40c1cdf62106a0705807e8be5bee235a9e9efe716e22568e3885561a835d7d5b } chi_tra 3.00 {Chinese (Traditional)} { rmd160 590262a166a86f55177dde0dd0b10926a91c1ef4 \ sha256 68ac2d83998b7f6e3fa26a58be8be5507e78185a16b3e106fe48a7b2667165fc } dan 3.00 Danish { rmd160 f354451e42486609ff631b5a3ba52134cc0a706f \ sha256 2104d8b1cfde6fa960e31db826a101a356c77c3efeae6e78cab5a97e62f3410c } dan-frak 3.00 {Danish (Fraktur)} { rmd160 3e2da733cb9aea235b5deb6ec2bd89a68ef9e6f9 \ sha256 ddc9f75b6104618d0682c9c98f5fed1ec52b1b0d5a54d38a20b20f0ab5d9a32b } deu 3.00 German { rmd160 f5e26562dfb3259c0c6fca0c9d1a23589f6af981 \ sha256 9e1845a69d5b6cf93d4fa05d5b8934e7cfaf8b088e6be9e8dac30c7859591ccc } deu-frak 3.00 {German (Fraktur)} { rmd160 899d849c87ca4adf3a7303b0208cd41e0754eb63 \ sha256 59f005f735a33039d416f699bff0bae7d42e5ab000dca6e1aae99d99f3776dc5 } ell 3.00 Greek { rmd160 791acfb5a60875d02e23ebc8c66243adedb5cf39 \ sha256 f8cb765c40733a677fa470370b935c34cfc53ba8de372b33e83ab59c7407195d } eng 3.01 English { rmd160 71fedcc390097e6f25437342be57bc2c800eb5a0 \ sha256 89dceb9910dcfc21296645b289834269f9dd5ebeb819fe6b8d2f7fcbafd8c8b1 } fin 3.00 Finnish { rmd160 a467774acf081aff2bd7fc5e1557eb1486336b6f \ sha256 30c05d403295fb659048b5d82db0d87c9cbee5077314e2b52f6fdca9c9ec8520 } fra 3.00 French { rmd160 97ab2fb064da5d7d15032fd1c5e87aa2f14dc1b3 \ sha256 1d795f1da1ee20671ffbbf4b2997b424f459861b217c9ffa7abbeb72abd760d6 } heb 3.01 Hebrew { rmd160 81326ae76cf8bb9ad222080787f976da32c9cb8f \ sha256 d5cf025c43453bc5327b2631133b6695d85ad0f700546220fddb72929fb86fc1 } heb-com 3.01 {Hebrew (community)} { rmd160 6433a5cbec6baf64b21a16bce60ca307f09f530b \ sha256 20a9d53172a8dbf1a4bbd7f7e9e3a523a9435c47c751533642e8a9becc8584ff } hin 3.01 Hindi { rmd160 7c732c22c0c9027e43fa267538f73449fb8211d6 \ sha256 e97e5d4fcbce0edbd5d3973d603efe7072db17d20e4e5243fe58803710f2a549 } hun 3.00 Hungarian { rmd160 7acc341ed55ca61869c7876b8b37ac2a143abd5b \ sha256 7b4d80703067fc5a8bf3da80f7da699f26a665a3e5ca93004fe353a1d6e74f60 } ind 3.00 Indonesian { rmd160 bc714068a10982dcf32816e823dbca6be63489ee \ sha256 9b2ee5ab2d33511f5fb8edbaee3d3a448cf8ab9efbf8e5c852d59481317e8218 } ita 3.00 Italian { rmd160 522f4110223c0da1894d39bc49fd1400b1527e2e \ sha256 a6cbaa39fddee521090e48d6a83b6f62e77b5fe7d90ab8ce9ee4d8ada10730f5 } jpn 3.00 Japanese { rmd160 47dba0ff10e9bd6979a31fa1ccc79b7d84775432 \ sha256 ebbde8c607a05cdd97d492734896a24b2aa17d5b4fb00e9597a96b39c1f22aa3 } kor 3.00 Korean { rmd160 1aa22020b2bcdea7721d111065b2070191149e32 \ sha256 8d4709341dbda7da9f42bb1d39c4d22d5ca767c2a30fbe36cb2ad152f092b01f } lav 3.00 Latvian { rmd160 eb1efa104597850e47cdae8fa70cc4f120959810 \ sha256 305173b54d836dae2f22e4f488734440a83e683636c033cafe066083738ceebc } lit 3.00 Lithuanian { rmd160 65aab59e4be61c3734645f96e688dbd81aa384b1 \ sha256 1a04f9e952a76d430c2b9d16140f2d42f27c72a9bbd55b3e8f2b6e701ef0f399 } nld 3.00 Dutch { rmd160 4026b44d7849a0c78d1831e00805f985ffad6421 \ sha256 2826628b0ff22fc3bb5d6e9d6901a39141b805b78084e0c9ab61f12c28747831 } nor 3.00 Norwegian { rmd160 36ee419e1ba3a49db749f15c5b204bfaee0bc848 \ sha256 c97cbfd93f676b331296b729d5526d1accc1325474a6b6d91260d03a1c862606 } pol 3.00 Polish { rmd160 08197fede8151cfdef50a6d2e41c55f384d3f909 \ sha256 708e2c59cc4e6451e90fc1ef57b83b809bc354b9e0ef3935e9f181d2a718d5ce } por 3.00 Portuguese { rmd160 90a73ffdf23c3ca9cc1b30d5f30943c67f4f59a0 \ sha256 25df33e4f6c55749d04a5307f1143e31a71a76b7417b91a531c686bf49308b85 } ron 3.00 Romanian { rmd160 9755d5002e6dfd581003168bae24bc0697c89318 \ sha256 367ccb440283e57e4e3f87cd0b97a59a07ceb920e09da8778feafe5e086a9892 } rus 3.00 Russian { rmd160 aeb8ab308499f2414c21f5299f2abf5082c6a282 \ sha256 8ced9431d2b1d544fbdd362c36786e3804451da06093bc45acecad84829e5f7a } slk 3.00 Slovakian { rmd160 4e661fce45076128455f2b2d0e60b93d67a8038d \ sha256 037839ad756b9177f7d8f7bd3f01a3a1012094360c5c62a49abbde0a33389511 } slk-frak 3.01 {Slovakian (Fraktur)} { rmd160 4277eeb130de9ec42e6722fa34824116ef52def3 \ sha256 f35b8a858d509c540a5829d94e8f8800ed25db4c904661000f4c4fa528d5c156 } slv 3.00 Slovenian { rmd160 854b6ec39d09ec210a7850d56573f5a77da37b6e \ sha256 bb7318f24972abc380688c67be86d07193e0294b252b60c648720978ad4a8b04 } spa 3.00 Spanish { rmd160 92303810e9429ca5a9daa39e02a015a78ac09cd6 \ sha256 5de0748b068f35c941e86bba622e23a376a6b084cb094007e7059714f1e030e0 } srp 3.00 {Serbian (Latin)} { rmd160 70ef247a50a72c5d551f2f7dc246275b9096a9b8 \ sha256 542b83f62389ff8cc34746dd765345566ab368b63dcf275c223ecb52c2cb9291 } swe 3.00 Swedish { rmd160 7653dd8a57f75a9e240a8e256c7a4b20a2e91040 \ sha256 25331ca1f41378a86336604476049810a0c5350417205e300cb2d11048cec2c1 } swe-frak 3.00 {Swedish (Fraktur)} { rmd160 dd094a391eff0f62718655cf94eabdd5ff3d7a8f \ sha256 2cf1a0e1eb8a0e3f8477ebcde44d99a9229f2c20b2d96ecb1ca6c25db5c6fdec } tgl 3.00 Tagalog { rmd160 669a4ea7bb6beac425070d8ba424f341eee447a9 \ sha256 d4d88e1fb31771d0b42e65291c03da3a167541df7c9682b81d833a4bd6c9e8cf } tha 3.01 Thai { rmd160 aad3a39e2ec68e71870031a88e77b7ec426befc1 \ sha256 5f8af525ebdfa789e0741351e808858e1a613b895becb138352d51b9fbd31322 } tur 3.00 Turkish { rmd160 15cebea216b539681d7306cab3f31f2773cd93e2 \ sha256 cb4e187f6c25c145252c88a576f3e3c9204c1d77e1c012172a85980c5c01088b } ukr 3.00 Ukranian { rmd160 0f040ec98453c38a0f19e90d7f34ab0dee8e9778 \ sha256 a54f0ce0843c863f102bfe135939200fc18702ce61ae6ebc571ab49460849365 } vie 3.00 Vietnamese { rmd160 74826cef758cf5a8c561c2e6381e053e151a6a91 \ sha256 5f61c32daf9a7071ff0dc95415aed75276538813398e3debb3849bba70bda713 } } foreach {lang_code lang_version lang_name lang_checksums} ${langs} { eval [subst { subport ${name}-[strsed ${lang_code} {g/_/-/}] { version ${lang_version} description ${lang_name} language data for the Tesseract OCR engine long_description ${lang_name} language data for the Tesseract OCR engine supported_archs noarch if {${lang_version} == 3.00} { distname ${lang_code}.traineddata dist_subdir ${name}/3.00_0 pre-extract { xinstall -d ${workpath}/tesseract-ocr/tessdata } extract.suffix .gz extract.post_args > ${workpath}/tesseract-ocr/tessdata/${lang_code}.traineddata } else { distname tesseract-ocr-${lang_version}.${lang_code} dist_subdir ${name} } checksums [string trim ${lang_checksums}] use_configure no build {} destroot { copy ${workpath}/tesseract-ocr/tessdata ${destroot}${prefix}/share/ } } }] }