Read id3 v2.4 tags with native Chrome Javascript/FileReader/DataView

假装没事ソ 提交于 2019-12-03 07:27:48

问题


Based on the answer of ebidel, one can read id3v1 tags by using jDataView:

document.querySelector('input[type="file"]').onchange = function (e) {
    var reader = new FileReader();

    reader.onload = function (e) {
        var dv = new jDataView(this.result);

        // "TAG" starts at byte -128 from EOF.
        // See http://en.wikipedia.org/wiki/ID3
        if (dv.getString(3, dv.byteLength - 128) == 'TAG') {
            var title = dv.getString(30, dv.tell());
            var artist = dv.getString(30, dv.tell());
            var album = dv.getString(30, dv.tell());
            var year = dv.getString(4, dv.tell());
        } else {
            // no ID3v1 data found.
        }
    };

    reader.readAsArrayBuffer(this.files[0]);
};

Chrome and other browsers have now implemented DataView (I'm interested only in Chrome). I'm curious if someone knows how to:

  1. Read tags using the native DataView
  2. Reading id3 v2.4 tags (including APIC image 'coverart')

The point is that I have no experience with binary files, and totally don't know how to jump to the correct tag position, or what little endian and long endian (or whatever) are. I just need an example for one tag - let's say the title, the TIT2 tag, which I hope helps me to understand how to jump to the correct position and to read the other tags also:

function readID3() {
    //https://developer.mozilla.org/en-US/docs/Web/API/DataView
    //and the position
    //http://id3.org/id3v2.4.0-frames
    //var id3={};
    //id3.TIT2=new DataView(this.result,?offset?,?length?)

    /*
     ?
     var a=new DataView(this.result);
     console.dir(String.fromCharCode(a.getUint8(0)));
     ?
    */
}
function readFile() {
    var a = new FileReader();
    a.onload = readID3;
    a.readAsArrayBuffer(this.files[0]);
}
fileBox.addEventListener('change', readFile, false);

Here is the JSFiddle.


UPDATE

http://jsfiddle.net/s492L/3/

I added getString so I can read the first line and check if it contains ID3. Now I need to find the position of the first tag (TIT2) and the 'variable' length of that string & also check if it's version 2.4.

//Header
//ID3v2/file identifier    "ID3"
//ID3v2 version            $04 00
//ID3v2 flags         (%ab000000 in v2.2, %abc00000 in v2.3, %abcd0000 in v2.4.x)
//ID3v2 size                 4 * %0xxxxxxx

Possible external sources:

https://developer.mozilla.org/en-US/docs/Web/API/DataView

http://id3.org/id3v2.4.0-frames

http://id3.org/id3v2.4.0-structure

http://blog.nihilogic.dk/2008/08/reading-id3-tags-with-javascript.html

http://ericbidelman.tumblr.com/post/8343485440/reading-mp3-id3-tags-in-javascript

https://github.com/aadsm/JavaScript-ID3-Reader

I'm using the PHP getid3 lib at the moment...

http://getid3.sourceforge.net/

http://getid3.sourceforge.net/source2/module.tag.id3v2.phps


回答1:


Using the code I found here: http://www.ulduzsoft.com/2012/07/parsing-id3v2-tags-in-the-mp3-files/, I translated it into Javascript here: http://jsfiddle.net/eb7rrbw4/

Here is the code as I wrote it there:

DataView.prototype.getChar=function(start) {
    return String.fromCharCode(this.getUint8(start));
};
DataView.prototype.getString=function(start,length) {
    for(var i=0,v='';i<length;++i) {
        v+=this.getChar(start+i);
    }
    return v;
};
DataView.prototype.getInt=function(start) {
    return (this.getUint8(start) << 21) | (this.getUint8(start+1) << 14) | (this.getUint8(start+2) << 7) | this.getUint8(start+3);
};

function readID3(){
    var a=new DataView(this.result);
    // Parse it quickly
    if ( a.getString(0,3)!="ID3" )
    {
        return false;
    }

    // True if the tag is pre-V3 tag (shorter headers)
    var TagVersion = a.getUint8(3);

    // Check the version
    if ( TagVersion < 0 || TagVersion > 4 )
    {
        return false;
    }

    // Get the ID3 tag size and flags; see 3.1
    var tagsize = a.getInt(6)+10;
        //(a.getUint8(9) & 0xFF) | ((a.getUint8(8) & 0xFF) << 7 ) | ((a.getUint8(7) & 0xFF) << 14 ) | ((a.getUint8(6) & 0xFF) << 21 ) + 10;
    var uses_synch = (a.getUint8(5) & 0x80) != 0 ? true : false;
    var has_extended_hdr = (a.getUint8(5) & 0x40) != 0 ? true : false;

    var headersize=0;         
    // Read the extended header length and skip it
    if ( has_extended_hdr )
    {
        var headersize = a.getInt(10);
            //(a.getUint8(10) << 21) | (a.getUint8(11) << 14) | (a.getUint8(12) << 7) | a.getUint8(13); 
    }

    // Read the whole tag
    var buffer=new DataView(a.buffer.slice(10+headersize,tagsize));

    // Prepare to parse the tag
    var length = buffer.byteLength;

    // Recreate the tag if desynchronization is used inside; we need to replace 0xFF 0x00 with 0xFF
    if ( uses_synch )
    {
        var newpos = 0;
        var newbuffer = new DataView(new ArrayBuffer(tagsize));

        for ( var i = 0; i < tagsize; i++ )
        {
            if ( i < tagsize - 1 && (buffer.getUint8(i) & 0xFF) == 0xFF && buffer.getUint8(i+1) == 0 )
            {
                newbuffer.setUint8(newpos++,0xFF);
                i++;
                continue;
            }

            newbuffer.setUint8(newpos++,buffer.getUint8(i));                 
        }

        length = newpos;
        buffer = newbuffer;
    }

    // Set some params
    var pos = 0;
    var ID3FrameSize = TagVersion < 3 ? 6 : 10;
    var m_title;
    var m_artist;

    // Parse the tags
    while ( true )
    {
        var rembytes = length - pos;

        // Do we have the frame header?
        if ( rembytes < ID3FrameSize )
            break;

        // Is there a frame?
        if ( buffer.getChar(pos) < 'A' || buffer.getChar(pos) > 'Z' )
            break;

        // Frame name is 3 chars in pre-ID3v3 and 4 chars after
        var framename;
        var framesize;

        if ( TagVersion < 3 )
        {
            framename = buffer.getString(pos,3);
            framesize = ((buffer.getUint8(pos+5) & 0xFF) << 8 ) | ((buffer.getUint8(pos+4) & 0xFF) << 16 ) | ((buffer.getUint8(pos+3) & 0xFF) << 24 );
        }
        else
        {
            framename = buffer.getString(pos,4);
            framesize = buffer.getInt(pos+4);
                //(buffer.getUint8(pos+7) & 0xFF) | ((buffer.getUint8(pos+6) & 0xFF) << 8 ) | ((buffer.getUint8(pos+5) & 0xFF) << 16 ) | ((buffer.getUint8(pos+4) & 0xFF) << 24 );
        }

        if ( pos + framesize > length )
            break;

        if ( framename== "TPE1"  || framename== "TPE2"  || framename== "TPE3"  || framename== "TPE" )
        {
            if ( m_artist == null )
                m_artist = parseTextField( buffer, pos + ID3FrameSize, framesize );
        }

        if ( framename== "TIT2" || framename== "TIT" )
        {
            if ( m_title == null )
                m_title = parseTextField( buffer, pos + ID3FrameSize, framesize );
        }

        pos += framesize + ID3FrameSize;
        continue;
    }
    console.log(m_title,m_artist);
    return m_title != null || m_artist != null;
}

function parseTextField( buffer, pos, size )
{
    if ( size < 2 )
        return null;

    var charcode = buffer.getUint8(pos); 

    //TODO string decoding         
    /*if ( charcode == 0 )
        charset = Charset.forName( "ISO-8859-1" );
    else if ( charcode == 3 )
        charset = Charset.forName( "UTF-8" );
    else
        charset = Charset.forName( "UTF-16" );

    return charset.decode( ByteBuffer.wrap( buffer, pos + 1, size - 1) ).toString();*/
    return buffer.getString(pos+1,size-1);
}

You should see the title and author in the console log. Look at the parse text function, though, where the encoding determines the way to read the string. (search for TODO). Also I have not tested it with extended headers or uses_synch true or tag version 3.




回答2:


You can try using id3 parser on github.

Here's your updated fiddle that logs the tags object in the console

With the id3.js included, all you need to do in your code is this:

function readFile(){
   id3(this.files[0], function(err, tags) {
       console.log(tags);
   })
}
document.getElementsByTagName('input')[0].addEventListener('change',readFile,false);

And here is the tags object as created by id3:

{
  "title": "Stairway To Heaven",
  "album": "Stairway To Heaven",
  "artist": "Led Zeppelin",
  "year": "1999",
  "v1": {
    "title": "Stairway To Heaven",
    "artist": "Led Zeppelin",
    "album": "Stairway To Heaven",
    "year": "1999",
    "comment": "Classic Rock",
    "track": 13,
    "version": 1.1,
    "genre": "Other"
  },
  "v2": {
    "version": [3, 0],
    "title": "Stairway To Heaven",
    "album": "Stairway To Heaven",
    "comments": "Classic Rock",
    "publisher": "Virgin Records"
  }
}

Hope this helps!




回答3:


Partially Correct Answer (it properly reads utf8 formatted id3v2.4.0 including cover)

The things i asked in my question probably work now.

I wanted a very crude minimal function set to handle only id3v2.4.0 & and also parse the attached image.

With the help of @Siderite Zackwehdex, which answer is marked as correct, i understood the important part of the code that was missing.

As i had some time to play with it i made various modifications to the code.

First of all sorry for the compressed script but i have a better overview of the overall code. it's easier for me. if you have some questions about the code just ask.

Anyway, i removed the uses_synch ... it's really hard to find a file that uses synch. Same for the has_extended_hdr.I also remove the support for id3v2.0.0 to id3v2.2.0. I added a version check, that one works with all id3v2 subversions.

The main function output contains an array with all the tags, inside you can also find the id3v2 Version.Last, but i guess usefull to expand, i added a custom FRAME object that contains custom functions for FRAMES other than textFrames. The now only function inside converts the image/cover/APIC to a easy to use base64 string. Doing so the array can be stored as a JSON string.

While for some of you the compatibility is important the above mentioned exended header or sync are actually the smallest problem.

PROBLEMS

The encoding needs to be UTF-8 else you get strange text paddings and some images are parsed only partially. basically broken.

I want to avoid the use of external library or even a really big function just for that ... there needs to be some smart simple solution to handle properly the encoding. ISO-8859-1,UTF-8,UTF-16 .. big endian... whatever... #00 vs #00 00 ..

If that is done the support can be improved exponentially.

I hope that some of you have a solution for that.

CODE

DataView.prototype.str=function(a,b,c,d){//start,length,placeholder,placeholder
 b=b||1;c=0;d='';for(;c<b;)d+=String.fromCharCode(this.getUint8(a+c++));return d
}
DataView.prototype.int=function(a){//start
 return (this.getUint8(a)<<21)|(this.getUint8(a+1)<<14)|
 (this.getUint8(a+2)<<7)|this.getUint8(a+3)
}
var frID3={
 'APIC':function(x,y,z,q){
  var b=0,c=['',0,''],d=1,e,b64;
  while(b<3)e=x.getUint8(y+z+d++),c[b]+=String.fromCharCode(e),
  e!=0||(b+=b==0?(c[1]=x.getUint8(y+z+d),2):1);
  b64='data:'+c[0]+';base64,'+
  btoa(String.fromCharCode.apply(null,new Uint8Array(x.buffer.slice(y+z+++d,q))));
  return {mime:c[0],description:c[2],type:c[1],base64:b64}
 }
}
function readID3(a,b,c,d,e,f,g,h){
 if(!(a=new DataView(this.result))||a.str(0,3)!='ID3')return;
 g={Version:'ID3v2.'+a.getUint8(3)+'.'+a.getUint8(4)};
 a=new DataView(a.buffer.slice(10+((a.getUint8(5)&0x40)!=0?a.int(10):0),a.int(6)+10));
 b=a.byteLength;c=0;d=10;
 while(true){
  f=a.str(c);e=a.int(c+4);
  if(b-c<d||(f<'A'||f>'Z')||c+e>b)break;
  g[h=a.str(c,4)]=frID3[h]?frID3[h](a,c,d,e):a.str(c+d,e);
  c+=e+d;
 }
 console.log(g);
}

DEMO

https://jsfiddle.net/2awq6pz7/



来源:https://stackoverflow.com/questions/20212560/read-id3-v2-4-tags-with-native-chrome-javascript-filereader-dataview

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!