tag_id3.c 12.6 KB
Newer Older
1
/*
Max Kellermann's avatar
Max Kellermann committed
2
 * Copyright (C) 2003-2011 The Music Player Daemon Project
3
 * http://www.musicpd.org
4 5 6 7 8 9 10 11 12 13
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14 15 16 17
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 19
 */

20
#include "config.h"
21 22
#include "tag_id3.h"
#include "tag.h"
23
#include "riff.h"
24
#include "aiff.h"
25 26
#include "conf.h"

27
#include <glib.h>
28 29
#include <id3tag.h>

30
#include <stdio.h>
31
#include <stdlib.h>
32 33 34
#include <errno.h>
#include <string.h>

35 36 37
#undef G_LOG_DOMAIN
#define G_LOG_DOMAIN "id3"

38 39 40 41 42 43 44
#  ifndef ID3_FRAME_COMPOSER
#    define ID3_FRAME_COMPOSER "TCOM"
#  endif
#  ifndef ID3_FRAME_DISC
#    define ID3_FRAME_DISC "TPOS"
#  endif

Bart Nagel's avatar
Bart Nagel committed
45 46 47 48
#ifndef ID3_FRAME_ARTIST_SORT
#define ID3_FRAME_ARTIST_SORT "TSOP"
#endif

49
#ifndef ID3_FRAME_ALBUM_ARTIST_SORT
Bart Nagel's avatar
Bart Nagel committed
50
#define ID3_FRAME_ALBUM_ARTIST_SORT "TSO2" /* this one is unofficial, introduced by Itunes */
51 52 53 54 55 56
#endif

#ifndef ID3_FRAME_ALBUM_ARTIST
#define ID3_FRAME_ALBUM_ARTIST "TPE2"
#endif

57 58 59 60 61 62
static inline bool
tag_is_id3v1(struct id3_tag *tag)
{
	return (id3_tag_options(tag, 0, 0) & ID3_TAG_OPTION_ID3V1) != 0;
}

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
static id3_utf8_t *
tag_id3_getstring(const struct id3_frame *frame, unsigned i)
{
	union id3_field *field;
	const id3_ucs4_t *ucs4;

	field = id3_frame_field(frame, i);
	if (field == NULL)
		return NULL;

	ucs4 = id3_field_getstring(field);
	if (ucs4 == NULL)
		return NULL;

	return id3_ucs4_utf8duplicate(ucs4);
}

80 81
/* This will try to convert a string to utf-8,
 */
Max Kellermann's avatar
Max Kellermann committed
82
static id3_utf8_t *
83
import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4)
84
{
85
	id3_utf8_t *utf8, *utf8_stripped;
86
	id3_latin1_t *isostr;
87
	const char *encoding;
88

89 90
	/* use encoding field here? */
	if (is_id3v1 &&
91
	    (encoding = config_get_string(CONF_ID3V1_ENCODING, NULL)) != NULL) {
92
		isostr = id3_ucs4_latin1duplicate(ucs4);
93
		if (G_UNLIKELY(!isostr)) {
94 95
			return NULL;
		}
96 97 98

		utf8 = (id3_utf8_t *)
			g_convert_with_fallback((const char*)isostr, -1,
99
						"utf-8", encoding,
100
						NULL, NULL, NULL, NULL);
101
		if (utf8 == NULL) {
102 103
			g_debug("Unable to convert %s string to UTF-8: '%s'",
				encoding, isostr);
104
			g_free(isostr);
105 106
			return NULL;
		}
107
		g_free(isostr);
108 109
	} else {
		utf8 = id3_ucs4_utf8duplicate(ucs4);
110
		if (G_UNLIKELY(!utf8)) {
111 112 113
			return NULL;
		}
	}
114 115

	utf8_stripped = (id3_utf8_t *)g_strdup(g_strstrip((gchar *)utf8));
116
	g_free(utf8);
117 118

	return utf8_stripped;
119 120
}

121 122 123 124 125 126 127
/**
 * Import a "Text information frame" (ID3v2.4.0 section 4.2).  It
 * contains 2 fields:
 *
 * - encoding
 * - string list
 */
128
static void
129 130 131
tag_id3_import_text_frame(struct tag *dest, struct id3_tag *tag,
			  const struct id3_frame *frame,
			  enum tag_type type)
132 133 134 135 136 137
{
	id3_ucs4_t const *ucs4;
	id3_utf8_t *utf8;
	union id3_field const *field;
	unsigned int nstrings, i;

138
	if (frame->nfields != 2)
139
		return;
140 141 142 143 144

	/* check the encoding field */

	field = id3_frame_field(frame, 0);
	if (field == NULL || field->type != ID3_FIELD_TYPE_TEXTENCODING)
145
		return;
146

147 148 149 150 151 152 153 154 155 156 157 158 159
	/* process the value(s) */

	field = id3_frame_field(frame, 1);
	if (field == NULL || field->type != ID3_FIELD_TYPE_STRINGLIST)
		return;

	/* Get the number of strings available */
	nstrings = id3_field_getnstrings(field);
	for (i = 0; i < nstrings; i++) {
		ucs4 = id3_field_getstrings(field, i);
		if (ucs4 == NULL)
			continue;

160
		if (type == TAG_GENRE)
161 162 163
			ucs4 = id3_genre_name(ucs4);

		utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
164 165 166 167 168
		if (utf8 == NULL)
			continue;

		tag_add_item(dest, type, (char *)utf8);
		g_free(utf8);
169
	}
170 171
}

172 173 174 175 176 177 178 179 180 181 182 183 184 185
/**
 * Import all text frames with the specified id (ID3v2.4.0 section
 * 4.2).  This is a wrapper for tag_id3_import_text_frame().
 */
static void
tag_id3_import_text(struct tag *dest, struct id3_tag *tag, const char *id,
		    enum tag_type type)
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
	     (frame = id3_tag_findframe(tag, id, i)) != NULL; ++i)
		tag_id3_import_text_frame(dest, tag, frame, type);
}

186 187 188 189 190 191 192 193 194 195
/**
 * Import a "Comment frame" (ID3v2.4.0 section 4.10).  It
 * contains 4 fields:
 *
 * - encoding
 * - language
 * - string
 * - full string (we use this one)
 */
static void
196 197 198
tag_id3_import_comment_frame(struct tag *dest, struct id3_tag *tag,
			     const struct id3_frame *frame,
			     enum tag_type type)
199 200 201 202 203
{
	id3_ucs4_t const *ucs4;
	id3_utf8_t *utf8;
	union id3_field const *field;

204
	if (frame->nfields != 4)
205 206 207 208 209 210 211 212 213 214 215
		return;

	/* for now I only read the 4th field, with the fullstring */
	field = id3_frame_field(frame, 3);
	if (field == NULL)
		return;

	ucs4 = id3_field_getfullstring(field);
	if (ucs4 == NULL)
		return;

216
	utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
217 218 219 220 221
	if (utf8 == NULL)
		return;

	tag_add_item(dest, type, (char *)utf8);
	g_free(utf8);
222 223
}

224 225 226 227 228 229 230 231 232 233 234 235 236 237
/**
 * Import all comment frames (ID3v2.4.0 section 4.10).  This is a
 * wrapper for tag_id3_import_comment_frame().
 */
static void
tag_id3_import_comment(struct tag *dest, struct id3_tag *tag, const char *id,
		       enum tag_type type)
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
	     (frame = id3_tag_findframe(tag, id, i)) != NULL; ++i)
		tag_id3_import_comment_frame(dest, tag, frame, type);
}

238 239 240 241 242 243 244 245 246 247 248
/**
 * Parse a TXXX name, and convert it to a tag_type enum value.
 * Returns TAG_NUM_OF_ITEM_TYPES if the TXXX name is not understood.
 */
static enum tag_type
tag_id3_parse_txxx_name(const char *name)
{
	static const struct {
		enum tag_type type;
		const char *name;
	} musicbrainz_txxx[] = {
249
		{ TAG_ALBUM_ARTIST_SORT, "ALBUMARTISTSORT" },
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
		{ TAG_MUSICBRAINZ_ARTISTID, "MusicBrainz Artist Id" },
		{ TAG_MUSICBRAINZ_ALBUMID, "MusicBrainz Album Id" },
		{ TAG_MUSICBRAINZ_ALBUMARTISTID,
		  "MusicBrainz Album Artist Id" },
		{ TAG_MUSICBRAINZ_TRACKID, "MusicBrainz Track Id" },
	};

	for (unsigned i = 0; i < G_N_ELEMENTS(musicbrainz_txxx); ++i)
		if (strcmp(name, musicbrainz_txxx[i].name) == 0)
			return musicbrainz_txxx[i].type;

	return TAG_NUM_OF_ITEM_TYPES;
}

/**
 * Import all known MusicBrainz tags from TXXX frames.
 */
static void
tag_id3_import_musicbrainz(struct tag *mpd_tag, struct id3_tag *id3_tag)
{
	for (unsigned i = 0;; ++i) {
		const struct id3_frame *frame;
		id3_utf8_t *name, *value;
		enum tag_type type;

		frame = id3_tag_findframe(id3_tag, "TXXX", i);
		if (frame == NULL)
			break;

		name = tag_id3_getstring(frame, 1);
		if (name == NULL)
			continue;

		type = tag_id3_parse_txxx_name((const char*)name);
		free(name);

		if (type == TAG_NUM_OF_ITEM_TYPES)
			continue;

		value = tag_id3_getstring(frame, 2);
		if (value == NULL)
			continue;

		tag_add_item(mpd_tag, type, (const char*)value);
		free(value);
	}
}

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
/**
 * Imports the MusicBrainz TrackId from the UFID tag.
 */
static void
tag_id3_import_ufid(struct tag *mpd_tag, struct id3_tag *id3_tag)
{
	for (unsigned i = 0;; ++i) {
		const struct id3_frame *frame;
		union id3_field *field;
		const id3_latin1_t *name;
		const id3_byte_t *value;
		id3_length_t length;

		frame = id3_tag_findframe(id3_tag, "UFID", i);
		if (frame == NULL)
			break;

		field = id3_frame_field(frame, 0);
		if (field == NULL)
			continue;

		name = id3_field_getlatin1(field);
		if (name == NULL ||
		    strcmp((const char *)name, "http://musicbrainz.org") != 0)
			continue;

		field = id3_frame_field(frame, 1);
		if (field == NULL)
			continue;

		value = id3_field_getbinarydata(field, &length);
		if (value == NULL || length == 0)
			continue;

		tag_add_item_n(mpd_tag, TAG_MUSICBRAINZ_TRACKID,
			       (const char*)value, length);
	}
}

337 338
struct tag *tag_id3_import(struct id3_tag * tag)
{
339 340
	struct tag *ret = tag_new();

341
	tag_id3_import_text(ret, tag, ID3_FRAME_ARTIST, TAG_ARTIST);
342
	tag_id3_import_text(ret, tag, ID3_FRAME_ALBUM_ARTIST,
343
			    TAG_ALBUM_ARTIST);
344 345 346 347
	tag_id3_import_text(ret, tag, ID3_FRAME_ARTIST_SORT,
			    TAG_ARTIST_SORT);
	tag_id3_import_text(ret, tag, ID3_FRAME_ALBUM_ARTIST_SORT,
			    TAG_ALBUM_ARTIST_SORT);
348 349 350 351 352 353 354 355 356 357
	tag_id3_import_text(ret, tag, ID3_FRAME_TITLE, TAG_TITLE);
	tag_id3_import_text(ret, tag, ID3_FRAME_ALBUM, TAG_ALBUM);
	tag_id3_import_text(ret, tag, ID3_FRAME_TRACK, TAG_TRACK);
	tag_id3_import_text(ret, tag, ID3_FRAME_YEAR, TAG_DATE);
	tag_id3_import_text(ret, tag, ID3_FRAME_GENRE, TAG_GENRE);
	tag_id3_import_text(ret, tag, ID3_FRAME_COMPOSER, TAG_COMPOSER);
	tag_id3_import_text(ret, tag, "TPE3", TAG_PERFORMER);
	tag_id3_import_text(ret, tag, "TPE4", TAG_PERFORMER);
	tag_id3_import_comment(ret, tag, ID3_FRAME_COMMENT, TAG_COMMENT);
	tag_id3_import_text(ret, tag, ID3_FRAME_DISC, TAG_DISC);
358

359
	tag_id3_import_musicbrainz(ret, tag);
360
	tag_id3_import_ufid(ret, tag);
361

362 363 364 365
	if (tag_is_empty(ret)) {
		tag_free(ret);
		ret = NULL;
	}
366 367 368 369

	return ret;
}

Max Kellermann's avatar
Max Kellermann committed
370 371
static int
fill_buffer(void *buf, size_t size, FILE *stream, long offset, int whence)
372 373 374 375 376
{
	if (fseek(stream, offset, whence) != 0) return 0;
	return fread(buf, 1, size, stream);
}

Max Kellermann's avatar
Max Kellermann committed
377 378
static int
get_id3v2_footer_size(FILE *stream, long offset, int whence)
379 380 381 382
{
	id3_byte_t buf[ID3_TAG_QUERYSIZE];
	int bufsize;

Max Kellermann's avatar
Max Kellermann committed
383
	bufsize = fill_buffer(buf, ID3_TAG_QUERYSIZE, stream, offset, whence);
384 385 386 387
	if (bufsize <= 0) return 0;
	return id3_tag_query(buf, bufsize);
}

Max Kellermann's avatar
Max Kellermann committed
388 389
static struct id3_tag *
tag_id3_read(FILE *stream, long offset, int whence)
390 391
{
	struct id3_tag *tag;
Max Kellermann's avatar
Max Kellermann committed
392 393 394 395 396
	id3_byte_t query_buffer[ID3_TAG_QUERYSIZE];
	id3_byte_t *tag_buffer;
	int tag_size;
	int query_buffer_size;
	int tag_buffer_size;
397 398

	/* It's ok if we get less than we asked for */
Max Kellermann's avatar
Max Kellermann committed
399 400 401
	query_buffer_size = fill_buffer(query_buffer, ID3_TAG_QUERYSIZE,
				   stream, offset, whence);
	if (query_buffer_size <= 0) return NULL;
402 403

	/* Look for a tag header */
Max Kellermann's avatar
Max Kellermann committed
404 405
	tag_size = id3_tag_query(query_buffer, query_buffer_size);
	if (tag_size <= 0) return NULL;
406 407

	/* Found a tag.  Allocate a buffer and read it in. */
Max Kellermann's avatar
Max Kellermann committed
408 409
	tag_buffer = g_malloc(tag_size);
	if (!tag_buffer) return NULL;
410

Max Kellermann's avatar
Max Kellermann committed
411 412 413
	tag_buffer_size = fill_buffer(tag_buffer, tag_size, stream, offset, whence);
	if (tag_buffer_size < tag_size) {
		g_free(tag_buffer);
414 415 416
		return NULL;
	}

Max Kellermann's avatar
Max Kellermann committed
417
	tag = id3_tag_parse(tag_buffer, tag_buffer_size);
418

Max Kellermann's avatar
Max Kellermann committed
419
	g_free(tag_buffer);
420 421 422 423

	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
424 425
static struct id3_tag *
tag_id3_find_from_beginning(FILE *stream)
426 427 428 429 430 431
{
	struct id3_tag *tag;
	struct id3_tag *seektag;
	struct id3_frame *frame;
	int seek;

Max Kellermann's avatar
Max Kellermann committed
432
	tag = tag_id3_read(stream, 0, SEEK_SET);
433 434
	if (!tag) {
		return NULL;
Max Kellermann's avatar
Max Kellermann committed
435
	} else if (tag_is_id3v1(tag)) {
436 437 438 439 440 441 442 443 444 445 446 447 448
		/* id3v1 tags don't belong here */
		id3_tag_delete(tag);
		return NULL;
	}

	/* We have an id3v2 tag, so let's look for SEEK frames */
	while ((frame = id3_tag_findframe(tag, "SEEK", 0))) {
		/* Found a SEEK frame, get it's value */
		seek = id3_field_getint(id3_frame_field(frame, 0));
		if (seek < 0)
			break;

		/* Get the tag specified by the SEEK frame */
Max Kellermann's avatar
Max Kellermann committed
449 450
		seektag = tag_id3_read(stream, seek, SEEK_CUR);
		if (!seektag || tag_is_id3v1(seektag))
451 452 453 454 455 456 457 458 459 460
			break;

		/* Replace the old tag with the new one */
		id3_tag_delete(tag);
		tag = seektag;
	}

	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
461 462
static struct id3_tag *
tag_id3_find_from_end(FILE *stream)
463 464 465 466 467 468
{
	struct id3_tag *tag;
	struct id3_tag *v1tag;
	int tagsize;

	/* Get an id3v1 tag from the end of file for later use */
Max Kellermann's avatar
Max Kellermann committed
469
	v1tag = tag_id3_read(stream, -128, SEEK_END);
470 471

	/* Get the id3v2 tag size from the footer (located before v1tag) */
Max Kellermann's avatar
Max Kellermann committed
472
	tagsize = get_id3v2_footer_size(stream, (v1tag ? -128 : 0) - 10, SEEK_END);
473 474 475 476
	if (tagsize >= 0)
		return v1tag;

	/* Get the tag which the footer belongs to */
Max Kellermann's avatar
Max Kellermann committed
477
	tag = tag_id3_read(stream, tagsize, SEEK_CUR);
478 479 480 481 482 483 484 485 486
	if (!tag)
		return v1tag;

	/* We have an id3v2 tag, so ditch v1tag */
	id3_tag_delete(v1tag);

	return tag;
}

487
static struct id3_tag *
488
tag_id3_riff_aiff_load(FILE *file)
489 490 491 492 493 494 495
{
	size_t size;
	void *buffer;
	size_t ret;
	struct id3_tag *tag;

	size = riff_seek_id3(file);
496 497
	if (size == 0)
		size = aiff_seek_id3(file);
498 499 500
	if (size == 0)
		return NULL;

501
	if (size > 4 * 1024 * 1024)
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
		/* too large, don't allocate so much memory */
		return NULL;

	buffer = g_malloc(size);
	ret = fread(buffer, size, 1, file);
	if (ret != 1) {
		g_warning("Failed to read RIFF chunk");
		g_free(buffer);
		return NULL;
	}

	tag = id3_tag_parse(buffer, size);
	g_free(buffer);
	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
518
struct tag *tag_id3_load(const char *file)
519
{
520
	struct tag *ret;
521 522 523
	struct id3_tag *tag;
	FILE *stream;

524
	stream = fopen(file, "rb");
525
	if (!stream) {
526 527
		g_debug("tag_id3_load: Failed to open file: '%s', %s",
			file, strerror(errno));
528 529 530
		return NULL;
	}

Max Kellermann's avatar
Max Kellermann committed
531
	tag = tag_id3_find_from_beginning(stream);
532
	if (tag == NULL)
533
		tag = tag_id3_riff_aiff_load(stream);
534
	if (!tag)
Max Kellermann's avatar
Max Kellermann committed
535
		tag = tag_id3_find_from_end(stream);
536 537 538 539 540 541 542 543 544

	fclose(stream);

	if (!tag)
		return NULL;
	ret = tag_id3_import(tag);
	id3_tag_delete(tag);
	return ret;
}