トップ
新規
一覧
単語検索
最終更新
ヘルプ
ログイン
アールメカブ
hidden_C_CharCode
をテンプレートにして作成
開始行:
[[Programming]]
[[ここ:http://kazmuzik.net/lj/68924.html]]より引用
languageIdentifier (C Language)
私が Java で書いた EncodingIdentifier を、C言語にポーティ...
% cat encodingIdentifier.h
#include <iconv.h>
extern void *encodingIdentifier_Init() ;
extern void encodingIdentifier_Done(void *handle) ;
extern char *encodingIdentifier_Detect(void *handle, char...
% cat encodingIdentifier.c
#include "encodingIdentifier.h"
#include <stdlib.h>
struct _encodingIdentifier {
int weightProcess; // 1
int weightError; // 4
int nEncoding; // 6
char **encodings;
iconv_t *iconvs;
};
void *encodingIdentifier_Init() {
struct _encodingIdentifier *identifier = malloc(sizeof(...
identifier->weightProcess = 1;
identifier->weightError = 4;
identifier->nEncoding = 6;
identifier->encodings = malloc(sizeof(char*) * 6);
identifier->iconvs = malloc(sizeof(iconv_t) * 6);
identifier->encodings[0] = "US-ASCII";
identifier->encodings[1] = "ISO-2022-JP";
identifier->encodings[2] = "ISO-8859-1";
identifier->encodings[3] = "UTF-8";
identifier->encodings[4] = "EUC-JP";
identifier->encodings[5] = "Shift-JIS";
int i;
for (i = 0; i < 6; i++) {
identifier->iconvs[i] = iconv_open("UTF-16", identifi...
}
return identifier;
}
void encodingIdentifier_Done(void *handle) {
struct _encodingIdentifier *identifier = handle;
int i;
for (i = 0; i < identifier->nEncoding; i++) {
iconv_close(identifier->iconvs[i]);
}
free(identifier->iconvs);
free(identifier->encodings);
free(identifier);
}
char *encodingIdentifier_Detect(void *handle, char *buffe...
struct _encodingIdentifier *identifier = handle;
int maxScore = - length * identifier->weightError - 1;
char *encoding = NULL;
int i, score;
// printf("%s %d %d\n", buffer, offset, length);
for (i = 0; i < identifier->nEncoding; i++) {
score = encodingIdentifier_score(identifier->iconvs[i...
identifier->weightPr...
// printf("%d %s %d\n", i, identifier->encodings[i], ...
if (score > maxScore) {
maxScore = score;
encoding = identifier->encodings[i];
}
}
return encoding;
}
int encodingIdentifier_score(iconv_t cd, char *buffer, in...
int weightProcess, int weigh...
char *utf16Buff = malloc(sizeof(char) * length * 2);
int nError = 0;
int nProcess = 0;
char *inbuf = buffer + offset;
size_t inLeft = length;
char *outbuf = utf16Buff;
size_t outLeft = length * 2;
size_t n;
while (1) {
n = iconv(cd, &inbuf, &inLeft, &outbuf, &outLeft);
if (inLeft == 0) {
break;
}
nError += 1;
inbuf += 1;
inLeft -= 1;
outbuf += 2;
outLeft -= 2;
}
free(utf16Buff);
// nProcess = length - (length * 2 - outLeft) / 2;
nProcess = outLeft / 2;
return nProcess * weightProcess - nError * weightError;
}
% cat test1.c
#include <stdio.h>
#include <string.h>
#include "encodingIdentifier.h"
int main(int argc, char **argv) {
void *handle = encodingIdentifier_Init();
char *s = malloc(sizeof(char) * 8192);
gets(s);
// printf("%s %d\n", s, strlen(s));
char *encoding = encodingIdentifier_Detect(handle, s, 0...
printf("%s\n", encoding);
encodingIdentifier_Done(handle);
}
% gcc -o test1 test1.c encodingIdentifier.c
% echo "今日は良い天気です。" | ./test1
UTF-8
%
終了行:
[[Programming]]
[[ここ:http://kazmuzik.net/lj/68924.html]]より引用
languageIdentifier (C Language)
私が Java で書いた EncodingIdentifier を、C言語にポーティ...
% cat encodingIdentifier.h
#include <iconv.h>
extern void *encodingIdentifier_Init() ;
extern void encodingIdentifier_Done(void *handle) ;
extern char *encodingIdentifier_Detect(void *handle, char...
% cat encodingIdentifier.c
#include "encodingIdentifier.h"
#include <stdlib.h>
struct _encodingIdentifier {
int weightProcess; // 1
int weightError; // 4
int nEncoding; // 6
char **encodings;
iconv_t *iconvs;
};
void *encodingIdentifier_Init() {
struct _encodingIdentifier *identifier = malloc(sizeof(...
identifier->weightProcess = 1;
identifier->weightError = 4;
identifier->nEncoding = 6;
identifier->encodings = malloc(sizeof(char*) * 6);
identifier->iconvs = malloc(sizeof(iconv_t) * 6);
identifier->encodings[0] = "US-ASCII";
identifier->encodings[1] = "ISO-2022-JP";
identifier->encodings[2] = "ISO-8859-1";
identifier->encodings[3] = "UTF-8";
identifier->encodings[4] = "EUC-JP";
identifier->encodings[5] = "Shift-JIS";
int i;
for (i = 0; i < 6; i++) {
identifier->iconvs[i] = iconv_open("UTF-16", identifi...
}
return identifier;
}
void encodingIdentifier_Done(void *handle) {
struct _encodingIdentifier *identifier = handle;
int i;
for (i = 0; i < identifier->nEncoding; i++) {
iconv_close(identifier->iconvs[i]);
}
free(identifier->iconvs);
free(identifier->encodings);
free(identifier);
}
char *encodingIdentifier_Detect(void *handle, char *buffe...
struct _encodingIdentifier *identifier = handle;
int maxScore = - length * identifier->weightError - 1;
char *encoding = NULL;
int i, score;
// printf("%s %d %d\n", buffer, offset, length);
for (i = 0; i < identifier->nEncoding; i++) {
score = encodingIdentifier_score(identifier->iconvs[i...
identifier->weightPr...
// printf("%d %s %d\n", i, identifier->encodings[i], ...
if (score > maxScore) {
maxScore = score;
encoding = identifier->encodings[i];
}
}
return encoding;
}
int encodingIdentifier_score(iconv_t cd, char *buffer, in...
int weightProcess, int weigh...
char *utf16Buff = malloc(sizeof(char) * length * 2);
int nError = 0;
int nProcess = 0;
char *inbuf = buffer + offset;
size_t inLeft = length;
char *outbuf = utf16Buff;
size_t outLeft = length * 2;
size_t n;
while (1) {
n = iconv(cd, &inbuf, &inLeft, &outbuf, &outLeft);
if (inLeft == 0) {
break;
}
nError += 1;
inbuf += 1;
inLeft -= 1;
outbuf += 2;
outLeft -= 2;
}
free(utf16Buff);
// nProcess = length - (length * 2 - outLeft) / 2;
nProcess = outLeft / 2;
return nProcess * weightProcess - nError * weightError;
}
% cat test1.c
#include <stdio.h>
#include <string.h>
#include "encodingIdentifier.h"
int main(int argc, char **argv) {
void *handle = encodingIdentifier_Init();
char *s = malloc(sizeof(char) * 8192);
gets(s);
// printf("%s %d\n", s, strlen(s));
char *encoding = encodingIdentifier_Detect(handle, s, 0...
printf("%s\n", encoding);
encodingIdentifier_Done(handle);
}
% gcc -o test1 test1.c encodingIdentifier.c
% echo "今日は良い天気です。" | ./test1
UTF-8
%
ページ名: